@inproceedings{nakashole-2026-grammar,
title = "Grammar as Control: Modular Language Generation for the Long Tail",
author = "Nakashole, Ndapa",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1725/",
pages = "37192--37222",
ISBN = "979-8-89176-390-6",
abstract = "Large language models (LLMs) can, in principle, bootstrap language technologies for long-tail languages due to their pattern recognition capabilities. Yet in practice, without structured guidance, they produce narrow, unrepresentative samples that fail to cover the morphosyntactic space of typologically underrepresented languages.We propose Modular Typology-Informed Generation (mTIG), a prompting framework that transforms descriptive grammars into explicit control mechanisms that guide LLMs to generate typologically balanced synthetic data for downstream training. mTIG decomposes grammars into modular grammar slices, each targeting a specific morphosyntactic phenomenon (e.g., passive voice, causative morphology).Across three low-resource languages, mTIG improves typological entropy by up to 19{\%} and yields a ``student-beats-teacher'' effect, where distilled models outperform the source LLM by up to +20 chrF in machine translation. These findings show that grammar-as-control can construct training corpora wherever formal linguistic descriptions exist."
}Markdown (Informal)
[Grammar as Control: Modular Language Generation for the Long Tail](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1725/) (Nakashole, ACL 2026)
ACL