@inproceedings{mehta-etal-2025-unifying, title = "Unifying Mixture of Experts and Multi-Head Latent Attention for Efficient Language Models", author = "Mehta, Sushant and Dandekar, Raj and Dandekar, Rajat and Panat, Sreedath", editor = "Charpentier, Lucas and Choshen, Leshem and Cotterell, Ryan and Gul, Mustafa Omer and Hu, Michael Y. and Liu, Jing and Jumelet, Jaap and Linzen, Tal and Mueller, Aaron and Ross, Candace and Shah, Raj Sanjay and Warstadt, Alex and Wilcox, Ethan Gotlieb and Williams, Adina", booktitle = "Proceedings of the First BabyLM Workshop", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.babylm-main.3/", pages = "42--51", ISBN = "TODO" }