@inproceedings{diianni-deutsch-2025-dont,
title = "Don{'}t Sweat the Small Stuff: Segment-Level Meta-Evaluation Based on Pairwise Difference Correlation",
author = "DiIanni, Colten and
Deutsch, Daniel",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1273/",
pages = "25073--25081",
ISBN = "979-8-89176-332-6",
abstract = "This paper introduces Pairwise Difference Pearson (PDP), a novel segment-level meta-evaluation metric for Machine Translation (MT) that addresses limitations in previous Pearson{'}s $\rho$-based and Kendall{'}s $\tau$-based meta-evaluation approaches. PDP is a correlation-based metric that utilizes pairwise differences rather than raw scores. It draws on information from all segments for a more robust understanding of score distributions and uses only pairwise differences to refine Global Pearson to intra-segment comparisons. Analysis on the WMT{'}24 shared task shows PDP properly ranks sentinel evaluation metrics and better aligns with human error weightings than $acc_{eq}$."
}Markdown (Informal)
[Don’t Sweat the Small Stuff: Segment-Level Meta-Evaluation Based on Pairwise Difference Correlation](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1273/) (DiIanni & Deutsch, EMNLP 2025)
ACL