@inproceedings{wang-yuan-2024-assessing,
title = "Assessing the Efficacy of Grammar Error Correction: A Human Evaluation Approach in the {J}apanese Context",
author = "Wang, Qiao and
Yuan, Zheng",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.146/",
pages = "1666--1672",
abstract = "In this study, we evaluated the performance of the state-of-the-art sequence tagging grammar error detection and correction model (SeqTagger) using Japanese university students' writing samples. With an automatic annotation toolkit, ERRANT, we first evaluated SeqTagger`s performance on error correction with human expert correction as the benchmark. Then a human-annotated approach was adopted to evaluate Seqtagger`s performance in error detection using a subset of the writing dataset. Results indicated a precision of 63.66{\%} and a recall of 20.19{\%} for error correction in the full dataset. For the subset, after manual exclusion of irrelevant errors such as semantic and mechanical ones, the model shows an adjusted precision of 97.98{\%} and an adjusted recall of 42.98{\%} for error detection, indicating the model`s high accuracy but also its conservativeness. Thematic analysis on errors undetected by the model revealed that determiners and articles, especially the latter, were predominant. Specifically, in terms of context-independent errors, the model occasionally overlooked basic ones and faced challenges with overly erroneous or complex structures. Meanwhile, context-dependent errors, notably those related to tense and noun number, as well as those possibly influenced by the students' first language (L1), remained particularly challenging."
}
Markdown (Informal)
[Assessing the Efficacy of Grammar Error Correction: A Human Evaluation Approach in the Japanese Context](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.146/) (Wang & Yuan, LREC-COLING 2024)
ACL