@inproceedings{gokhale-etal-2026-kv, title = "{KV} {P}areto: Systems-Level Optimization of {KV} Cache and Model Compression for Long Context Inference", author = "Gokhale, Sai and Das, Devleena and Patwari, Rajeev and Sirasao, Ashish and Delaye, Elliott", editor = {Matusevych, Yevgen and Eryi{\u{g}}it, G{\"u}l{\c{s}}en and Aletras, Nikolaos}, booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)", month = mar, year = "2026", address = "Rabat, Morocco", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-industry.9/", pages = "119--131", ISBN = "979-8-89176-384-5" }