@inproceedings{xiong-etal-2023-examining,
title = "Examining Inter-Consistency of Large Language Models Collaboration: An In-depth Analysis via Debate",
author = "Xiong, Kai and
Ding, Xiao and
Cao, Yixin and
Liu, Ting and
Qin, Bing",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.508/",
doi = "10.18653/v1/2023.findings-emnlp.508",
pages = "7572--7590",
abstract = "Large Language Models (LLMs) have shown impressive capabilities in various applications, but they still face various inconsistency issues. Existing works primarily focus on the inconsistency issues within a single LLM, while we complementarily explore the inter-consistency among multiple LLMs for collaboration. To examine whether LLMs can collaborate effectively to achieve a consensus for a shared goal, we focus on commonsense reasoning, and introduce a formal debate framework (FORD) to conduct a three-stage debate among LLMs with real-world scenarios alignment: fair debate, mismatched debate, and roundtable debate. Through extensive experiments on various datasets, LLMs can effectively collaborate to reach a consensus despite noticeable inter-inconsistencies, but imbalances in their abilities can lead to domination by superior LLMs. Leveraging a more advanced LLM like GPT-4 as an authoritative judge can boost collaboration performance. Our work contributes to understanding the inter-consistency among LLMs and lays the foundation for developing future collaboration methods. Codes and data are available at https://github.com/Waste-Wood/FORD."
}
Markdown (Informal)
[Examining Inter-Consistency of Large Language Models Collaboration: An In-depth Analysis via Debate](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.508/) (Xiong et al., Findings 2023)
ACL