@inproceedings{shi-etal-2023-hallucination,
title = "Hallucination Mitigation in Natural Language Generation from Large-Scale Open-Domain Knowledge Graphs",
author = "Shi, Xiao and
Zhu, Zhengyuan and
Zhang, Zeyu and
Li, Chengkai",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-main.770/",
doi = "10.18653/v1/2023.emnlp-main.770",
pages = "12506--12521",
abstract = "In generating natural language descriptions for knowledge graph triples, prior works used either small-scale, human-annotated datasets or datasets with limited variety of graph shapes, e.g., those having mostly star graphs. Graph-to-text models trained and evaluated on such datasets are largely not assessed for more realistic large-scale, open-domain settings. We introduce a new dataset, GraphNarrative, to fill this gap. Fine-tuning transformer-based pre-trained language models has achieved state-of-the-art performance among graph-to-text models. However, this method suffers from information hallucination{---}the generated text may contain fabricated facts not present in input graphs. We propose a novel approach that, given a graph-sentence pair in GraphNarrative, trims the sentence to eliminate portions that are not present in the corresponding graph, by utilizing the sentence`s dependency parse tree. Our experiment results verify this approach using models trained on GraphNarrative and existing datasets. The dataset, source code, and trained models are released at https://github.com/idirlab/graphnarrator."
}
Markdown (Informal)
[Hallucination Mitigation in Natural Language Generation from Large-Scale Open-Domain Knowledge Graphs](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-main.770/) (Shi et al., EMNLP 2023)
ACL