@inproceedings{di-polo-etal-2026-baselines,
title = "Baselines for Detection and Classification of Discourse Presentation in {E}nglish Narrative",
author = "Di Polo, Reinaldo and
Ocal, Mustafa and
Finlayson, Mark",
editor = "Braud, Chlo{\'e} and
Hardmeier, Christian and
Ogrodniczuk, Maciej and
Loaiciga, Sharid and
Zeldes, Amir and
Nov{\'a}k, Michal and
Li, Chuyuan and
Strube, Michael and
Li, Junyi Jessy",
booktitle = "Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference ({CODI}-{CRAC} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.codi-1.3/",
pages = "1--11",
ISBN = "979-8-89176-400-2",
abstract = "Discourse presentation is when speech, writing, or thought (SW{\&}T) attributed to a discourse entity (such as a character in a narrative) is presented within a discourse. Discourse presentations can be generally broken into direct or indirect: direct presentation is when the text quotes the words or thoughts verbatim, whereas in indirect presentation the text expresses the SW{\&}T in the narrator{'}s or writer{'}s own words. Automatically detecting and categorizing discourse presentations supports discourse and narrative analysis and improves attribution for downstream NLP tasks, but detecting indirect discourse presentations remains challenging due to diverse surface forms and subtle perspective shifts. We study detection and categorization of discourse presentations on a corrected version of the Semino {\&} Short{'}s English Narrative SW{\&}TP corpus. We cast the task as five-way clause classification: Direct Speech {\&} Writing, Direct Thought, Indirect Speech {\&} Writing, Indirect Thought, and Narrative (i.e., no discourse presentation). We compare four approaches: (1) CNN; (2) generative baseline (Claude Sonnet 4.6); (3) untuned BERT, and (4) fine-tuned BERT. The CNN baseline achieves 0.43 F1 and exhibits substantial confusion with the Narrative class. Claude achieves 0.71 F1 but performs unevenly across classes and fails to recover Indirect Thought. BERT achieves 0.81 F1 overall but struggles on indirect categories. The fine-tuning BERT yields strong performance (0.88 F1), with remaining errors concentrated in Indirect Speech {\&} Writing (F1 = 0.60). We release our code and the corrected dataset to support reproducibility. To our knowledge, this is the first time computational approaches have been evaluated across the full range of SW{\&}TP discourse presentation types."
}Markdown (Informal)
[Baselines for Detection and Classification of Discourse Presentation in English Narrative](https://preview.aclanthology.org/ingest-acl-workshops/2026.codi-1.3/) (Di Polo et al., CODI-CRAC 2026)
ACL