@inproceedings{fang-etal-2024-hgot,
title = "{HGOT}: Hierarchical Graph of Thoughts for Retrieval-Augmented In-Context Learning in Factuality Evaluation",
author = "Fang, Yihao and
Thomas, Stephen and
Zhu, Xiaodan",
editor = "Ovalle, Anaelia and
Chang, Kai-Wei and
Cao, Yang Trista and
Mehrabi, Ninareh and
Zhao, Jieyu and
Galstyan, Aram and
Dhamala, Jwala and
Kumar, Anoop and
Gupta, Rahul",
booktitle = "Proceedings of the 4th Workshop on Trustworthy Natural Language Processing (TrustNLP 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.trustnlp-1.12/",
doi = "10.18653/v1/2024.trustnlp-1.12",
pages = "118--144",
abstract = "With the widespread adoption of large language models (LLMs) in numerous applications, the challenge of factuality and the propensity for hallucinations has emerged as a significant concern. To address this issue, particularly in retrieval-augmented in-context learning, we introduce the hierarchical graph of thoughts (HGOT), a structured, multi-layered graph approach designed to enhance the retrieval of pertinent passages during in-context learning. The framework utilizes the emergent planning capabilities of LLMs, employing the divide-and-conquer strategy to break down complex queries into manageable sub-queries. It refines self-consistency majority voting for answer selection, which incorporates the recently proposed citation recall and precision metrics to assess the quality of thoughts, linking an answer{'}s credibility intrinsically to the thought{'}s quality. This methodology introduces a weighted system in majority voting, prioritizing answers based on the citation quality of their thoughts. Additionally, we propose a scoring mechanism for evaluating retrieved passages, considering factors such as citation frequency and quality, self-consistency confidence, and the retrieval module{'}s ranking. Experiments indicate that HGOT excels as a versatile approach, outperforming competing models in FEVER by up to 7{\%} and matching leading models such as Retrieve-then-Read in Open-SQuAD, and DSP in HotPotQA, demonstrating its efficacy in enhancing LLMs' factuality."
}
Markdown (Informal)
[HGOT: Hierarchical Graph of Thoughts for Retrieval-Augmented In-Context Learning in Factuality Evaluation](https://preview.aclanthology.org/fix-sig-urls/2024.trustnlp-1.12/) (Fang et al., TrustNLP 2024)
ACL