@inproceedings{bestgen-2022-please,
title = "Please, Don`t Forget the Difference and the Confidence Interval when Seeking for the State-of-the-Art Status",
author = "Bestgen, Yves",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.640/",
pages = "5956--5962",
abstract = "This paper argues for the widest possible use of bootstrap confidence intervals for comparing NLP system performances instead of the state-of-the-art status (SOTA) and statistical significance testing. Their main benefits are to draw attention to the difference in performance between two systems and to help assessing the degree of superiority of one system over another. Two cases studies, one comparing several systems and the other based on a K-fold cross-validation procedure, illustrate these benefits."
}
Markdown (Informal)
[Please, Don’t Forget the Difference and the Confidence Interval when Seeking for the State-of-the-Art Status](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.640/) (Bestgen, LREC 2022)
ACL