@inproceedings{chitty-venkata-etal-2026-pagedeviction, title = "{P}aged{E}viction: Structured Block-wise {KV} Cache Pruning for Efficient Large Language Model Inference", author = "Chitty-Venkata, Krishna Teja and Ye, Jie and Raskar, Siddhisanket and Kougkas, Anthony and Sun, Xian and Emani, Murali and Vishwanath, Venkatram and Nicolae, Bogdan", editor = "Demberg, Vera and Inui, Kentaro and Marquez, Llu{\'i}s", booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026", month = mar, year = "2026", address = "Rabat, Morocco", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.168/", pages = "3207--3218", ISBN = "979-8-89176-386-9" }