@inproceedings{fioravanti-etal-2024-automatic,
title = "Automatic Error Detection: Comparing {AI} vs. Human Performance on {L}2 {I}talian Texts",
author = "Fioravanti, Irene and
Forti, Luciana and
Spina, Stefania",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clicit-1.44/",
pages = "366--372",
ISBN = "979-12-210-7060-6",
abstract = "This paper reports on a study aimed at comparing AI vs. human performance in detecting and categorising errors in L2 Italian texts. Four LLMs were considered: ChatGPT, Copilot, Gemini and Llama3. Two groups of human annotators were involved: L1 and L2 speakers of Italian. A gold standard set of annotations was developed. A fine-grained annotation scheme was adopted, to reflect the specific traits of Italian morphosyntax, with related potential learner errors. Overall, we found that human annotation outperforms AI, with some degree of variation with respect tospecific error types. An increased attention to languages other than English in NLP may significantly improve AI performance in this pivotal task for the many domains of language-related disciplines."
}
Markdown (Informal)
[Automatic Error Detection: Comparing AI vs. Human Performance on L2 Italian Texts](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clicit-1.44/) (Fioravanti et al., CLiC-it 2024)
ACL