@inproceedings{palma-gomez-rozovskaya-2025-low,
title = "Low-Resource Grammatical Error Correction: Selective Data Augmentation with Round-Trip Machine Translation",
author = "Palma Gomez, Frank and
Rozovskaya, Alla",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.findings-acl.1322/",
pages = "25749--25770",
ISBN = "979-8-89176-256-5",
abstract = "Supervised state-of-the-art methods for grammatical error correction require large amounts of parallel data for training. Due to lack of gold-labeled data, techniques that create synthetic training data have become popular. We show that models trained on synthetic data tend tocorrect a limited range of grammar and spelling mistakes that involve character-level changes, but perform poorly on (more complex) phenomena that require word-level changes. We propose to address the performance gap on such errors by generating synthetic data through selective data augmentation via round-trip machine translation. We show that the proposed technique, SeLex-RT, is capable of generating mistakes that are similar to those observed with language learners. Using the approach with two types of state-of-the-art learning frameworks and two low-resource languages (Russian and Ukrainian), we achieve substantial improvements, compared to training on synthetic data produced with standard techniques. Analysis of the output reveals that models trained on data noisified with the SeLex-RT approach are capable of making word-level changes and correct lexical errors common with language learners."
}
Markdown (Informal)
[Low-Resource Grammatical Error Correction: Selective Data Augmentation with Round-Trip Machine Translation](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.findings-acl.1322/) (Palma Gomez & Rozovskaya, Findings 2025)
ACL