@inproceedings{oda-2023-training,
title = "Training for Grammatical Error Correction Without Human-Annotated {L}2 Learners' Corpora",
author = "Oda, Mikio",
editor = {Kochmar, Ekaterina and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Madnani, Nitin and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng and
Zesch, Torsten},
booktitle = "Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-Pages-WenzhengZhang-ZhengyanShi-ShuYang/2023.bea-1.38/",
doi = "10.18653/v1/2023.bea-1.38",
pages = "455--465",
abstract = "Grammatical error correction (GEC) is a challenging task for non-native second language (L2) learners and learning machines. Data-driven GEC learning requires as much human-annotated genuine training data as possible. However, it is difficult to produce larger-scale human-annotated data, and synthetically generated large-scale parallel training data is valuable for GEC systems. In this paper, we propose a method for rebuilding a corpus of synthetic parallel data using target sentences predicted by a GEC model to improve performance. Experimental results show that our proposed pre-training outperforms that on the original synthetic datasets. Moreover, it is also shown that our proposed training without human-annotated L2 learners' corpora is as practical as conventional full pipeline training with both synthetic datasets and L2 learners' corpora in terms of accuracy."
}
Markdown (Informal)
[Training for Grammatical Error Correction Without Human-Annotated L2 Learners’ Corpora](https://preview.aclanthology.org/Author-Pages-WenzhengZhang-ZhengyanShi-ShuYang/2023.bea-1.38/) (Oda, BEA 2023)
ACL