@inproceedings{ignatev-etal-2026-dataset,
title = "Dataset Cartography for Implicit Discourse Relation Recognition: Promises and Pitfalls",
author = "Ignatev, Daniil and
Paperno, Denis and
Poesio, Massimo",
editor = "Braud, Chlo{\'e} and
Hardmeier, Christian and
Ogrodniczuk, Maciej and
Loaiciga, Sharid and
Zeldes, Amir and
Nov{\'a}k, Michal and
Li, Chuyuan and
Strube, Michael and
Li, Junyi Jessy",
booktitle = "Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference ({CODI}-{CRAC} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.codi-1.8/",
pages = "53--64",
ISBN = "979-8-89176-400-2",
abstract = "Crowdsourced data for implicit discourse relation recognition, IDRR, has been shown to contain both plausible interpretations and noisy annotations. We present a case study of dataset cartography (Swayamdipta 2020) on IDRR-focused DiscoGeM corpus (Scholman et al., 2022). Our findings show that error identification via low confidence proves unreliable, as confidence is strongly affected by label rarity. However, high-confidence datapoints reveal a different use case: auditing the cue-rich regions of the dataset. Our lexical probe demonstrates an association between high confidence items and (mostly temporal) intra-argument cue words. Dataset cartography can thus serve a diagnostic of cue-driven easy-to-learn cases, which need to be balanced out to ensure the robustness of IDRR learning."
}Markdown (Informal)
[Dataset Cartography for Implicit Discourse Relation Recognition: Promises and Pitfalls](https://preview.aclanthology.org/ingest-acl-workshops/2026.codi-1.8/) (Ignatev et al., CODI-CRAC 2026)
ACL
- Daniil Ignatev, Denis Paperno, and Massimo Poesio. 2026. Dataset Cartography for Implicit Discourse Relation Recognition: Promises and Pitfalls. In Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference (CODI-CRAC 2026), pages 53–64, San Diego, California, USA. Association for Computational Linguistics.