@inproceedings{li-etal-2024-contradoc,
title = "{C}ontra{D}oc: Understanding Self-Contradictions in Documents with Large Language Models",
author = "Li, Jierui and
Raheja, Vipul and
Kumar, Dhruv",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.naacl-long.362/",
doi = "10.18653/v1/2024.naacl-long.362",
pages = "6509--6523",
abstract = "In recent times, large language models (LLMs) have shown impressive performance on various document-level tasks such as document classification, summarization, and question-answering. However, research on understanding their capabilities on the task of self-contradictions in long documents has been very limited. In this work, we introduce ContraDoc, the first human-annotated dataset to study self-contradictions in long documents across multiple domains, varying document lengths, self-contradiction types, and appearance scope. We then analyze the current capabilities of four state-of-the-art open-source and commercially available LLMs: GPT3.5, GPT4, PaLM2, and LLaMAv2 on this dataset. While GPT4 performs the best and can outperform humans on this task, we find that it is still unreliable and struggles with self-contradictions that require more nuance and context. We release the dataset and all the code associated with the experiments."
}
Markdown (Informal)
[ContraDoc: Understanding Self-Contradictions in Documents with Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2024.naacl-long.362/) (Li et al., NAACL 2024)
ACL