@inproceedings{papoudakis-etal-2024-bookworm,
title = "{B}ook{W}orm: A Dataset for Character Description and Analysis",
author = "Papoudakis, Argyrios and
Lapata, Mirella and
Keller, Frank",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.findings-emnlp.258/",
doi = "10.18653/v1/2024.findings-emnlp.258",
pages = "4471--4500",
abstract = "Characters are at the heart of every story, driving the plot and engaging readers. In this study, we explore the understanding of characters in full-length books, which contain complex narratives and numerous interacting characters. We define two tasks: character description, which generates a brief factual profile, and character analysis, which offers an in-depth interpretation, including character development, personality, and social context. We introduce the BookWorm dataset, pairing books from the Gutenberg Project with human-written descriptions and analyses. Using this dataset, we evaluate state-of-the-art long-context models in zero-shot and fine-tuning settings, utilizing both retrieval-based and hierarchical processing for book-length inputs. Our findings show that retrieval-based approaches outperform hierarchical ones in both tasks. Additionally, fine-tuned models using coreference-based retrieval produce the most factual descriptions, as measured by fact- and entailment-based metrics. We hope our dataset, experiments, and analysis will inspire further research in character-based narrative understanding."
}
Markdown (Informal)
[BookWorm: A Dataset for Character Description and Analysis](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.findings-emnlp.258/) (Papoudakis et al., Findings 2024)
ACL