@inproceedings{chitty-venkata-etal-2026-pagedeviction,
    title = "{P}aged{E}viction: Structured Block-wise {KV} Cache Pruning for Efficient Large Language Model Inference",
    author = "Chitty-Venkata, Krishna Teja  and
      Ye, Jie  and
      Raskar, Siddhisanket  and
      Kougkas, Anthony  and
      Sun, Xian  and
      Emani, Murali  and
      Vishwanath, Venkatram  and
      Nicolae, Bogdan",
    editor = "Demberg, Vera  and
      Inui, Kentaro  and
      Marquez, Llu{\'i}s",
    booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
    month = mar,
    year = "2026",
    address = "Rabat, Morocco",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.168/",
    pages = "3207--3218",
    ISBN = "979-8-89176-386-9"
}