@inproceedings{huang-etal-2026-mobilellm, title = "{M}obile{LLM}-Flash: Latency-Guided On-Device {LLM} Design for Industry Scale Deployment", author = "Huang, Hanxian and Fedorov, Igor and Gromov, Andrey and Beckerman, Bernard and Suda, Naveen and Eriksson, David and Balandat, Maximilian and Conway, Rylan and Huber, Patrick and Sankar, Chinnadhurai and Dalmia, Ayushi and Liu, Zechun and Wu, Lemeng and Elgamal, Tarek and Sagar, Adithya and Chandra, Vikas and Krishnamoorthi, Raghuraman", editor = "Li, Yunyao and Rehm, Georg and Tu, Mei", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)", month = jul, year = "2026", address = "San Diego, California, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.51/", pages = "749--760", ISBN = "979-8-89176-394-4" }