@inproceedings{yasunaga-etal-2021-lm,
title = "{LM}-Critic: Language Models for Unsupervised Grammatical Error Correction",
author = "Yasunaga, Michihiro and
Leskovec, Jure and
Liang, Percy",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2021.emnlp-main.611/",
doi = "10.18653/v1/2021.emnlp-main.611",
pages = "7752--7763",
abstract = "Grammatical error correction (GEC) requires a set of labeled ungrammatical / grammatical sentence pairs for training, but obtaining such annotation can be prohibitively expensive. Recently, the Break-It-Fix-It (BIFI) framework has demonstrated strong results on learning to repair a broken program without any labeled examples, but this relies on a perfect critic (e.g., a compiler) that returns whether an example is valid or not, which does not exist for the GEC task. In this work, we show how to leverage a pretrained language model (LM) in defining an LM-Critic, which judges a sentence to be grammatical if the LM assigns it a higher probability than its local perturbations. We apply this LM-Critic and BIFI along with a large set of unlabeled sentences to bootstrap realistic ungrammatical / grammatical pairs for training a corrector. We evaluate our approach on GEC datasets on multiple domains (CoNLL-2014, BEA-2019, GMEG-wiki and GMEG-yahoo) and show that it outperforms existing methods in both the unsupervised setting (+7.7 F0.5) and the supervised setting (+0.5 F0.5)."
}
Markdown (Informal)
[LM-Critic: Language Models for Unsupervised Grammatical Error Correction](https://preview.aclanthology.org/landing_page/2021.emnlp-main.611/) (Yasunaga et al., EMNLP 2021)
ACL