@inproceedings{gomez-etal-2023-low,
title = "A Low-Resource Approach to the Grammatical Error Correction of {U}krainian",
author = "Palma Gomez, Frank and
Rozovskaya, Alla and
Roth, Dan",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.unlp-1.14/",
doi = "10.18653/v1/2023.unlp-1.14",
pages = "114--120",
abstract = "We present our system that participated in the shared task on the grammatical error correction of Ukrainian. We have implemented two approaches that make use of large pre-trained language models and synthetic data, that have been used for error correction of English as well as low-resource languages. The first approach is based on fine-tuning a large multilingual language model (mT5) in two stages: first, on synthetic data, and then on gold data. The second approach trains a (smaller) seq2seq Transformer model pre-trained on synthetic data and fine-tuned on gold data. Our mT5-based model scored first in {\textquotedblleft}GEC only{\textquotedblright} track, and a very close second in the {\textquotedblleft}GEC+Fluency{\textquotedblright} track. Our two key innovations are (1) finetuning in stages, first on synthetic, and then on gold data; and (2) a high-quality corruption method based on roundtrip machine translation to complement existing noisification approaches."
}
Markdown (Informal)
[A Low-Resource Approach to the Grammatical Error Correction of Ukrainian](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.unlp-1.14/) (Palma Gomez et al., UNLP 2023)
ACL