@inproceedings{kasewa-etal-2018-wronging,
title = "Wronging a Right: Generating Better Errors to Improve Grammatical Error Detection",
author = "Kasewa, Sudhanshu and
Stenetorp, Pontus and
Riedel, Sebastian",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/D18-1541/",
doi = "10.18653/v1/D18-1541",
pages = "4977--4983",
abstract = "Grammatical error correction, like other machine learning tasks, greatly benefits from large quantities of high quality training data, which is typically expensive to produce. While writing a program to automatically generate realistic grammatical errors would be difficult, one could learn the distribution of naturally-occurring errors and attempt to introduce them into other datasets. Initial work on inducing errors in this way using statistical machine translation has shown promise; we investigate cheaply constructing synthetic samples, given a small corpus of human-annotated data, using an off-the-rack attentive sequence-to-sequence model and a straight-forward post-processing procedure. Our approach yields error-filled artificial data that helps a vanilla bi-directional LSTM to outperform the previous state of the art at grammatical error detection, and a previously introduced model to gain further improvements of over 5{\%} F0.5 score. When attempting to determine if a given sentence is synthetic, a human annotator at best achieves 39.39 F1 score, indicating that our model generates mostly human-like instances."
}
Markdown (Informal)
[Wronging a Right: Generating Better Errors to Improve Grammatical Error Detection](https://preview.aclanthology.org/fix-sig-urls/D18-1541/) (Kasewa et al., EMNLP 2018)
ACL