@inproceedings{gong-etal-2025-hata, title = "{HATA}: Trainable and Hardware-Efficient Hash-Aware Top-$k$ Attention for Scalable Large Model Inference", author = "Gong, Ping and Yi, Jiawei and Wang, Shengnan and Zhang, Juncheng and Jin, Zewen and Zhou, Ouxiang and Liu, Ruibo and Xu, Guanbin and Bai, Youhui and Ye, Bowen and Yuan, Kun and Yang, Tong and Zhang, Gong and Chen, Renhai and Wu, Feng and Li, Cheng", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Findings of the Association for Computational Linguistics: ACL 2025", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.1275/", pages = "24856--24871", ISBN = "979-8-89176-256-5" }