@inproceedings{koyama-etal-2024-n-gram,
title = "n-gram {F}-score for Evaluating Grammatical Error Correction",
author = "Koyama, Shota and
Nagata, Ryo and
Takamura, Hiroya and
Okazaki, Naoaki",
editor = "Mahamood, Saad and
Minh, Nguyen Le and
Ippolito, Daphne",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.inlg-main.25/",
pages = "303--313",
abstract = "M2 and its variants are the most widely used automatic evaluation metrics for grammatical error correction (GEC), which calculate an F-score using a phrase-based alignment between sentences. However, it is not straightforward at all to align learner sentences containing errors to their correct sentences. In addition, alignment calculations are computationally expensive. We propose GREEN, an alignment-free F-score for GEC evaluation. GREEN treats a sentence as a multiset of n-grams and extracts edits between sentences by set operations instead of computing an alignment. Our experiments confirm that GREEN performs better than existing methods for the corpus-level metrics and comparably for the sentence-level metrics even without computing an alignment. GREEN is available at https://github.com/shotakoyama/green."
}
Markdown (Informal)
[n-gram F-score for Evaluating Grammatical Error Correction](https://preview.aclanthology.org/fix-sig-urls/2024.inlg-main.25/) (Koyama et al., INLG 2024)
ACL