@inproceedings{kobs-etal-2024-pollice,
title = "Pollice Verso at {S}em{E}val-2024 Task 6: The {R}oman Empire Strikes Back",
author = "Kobs, Konstantin and
Pfister, Jan and
Hotho, Andreas",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.219/",
doi = "10.18653/v1/2024.semeval-1.219",
pages = "1529--1536",
abstract = "We present an intuitive approach for hallucination detection in LLM outputs that is modeled after how humans would go about this task. We engage several LLM ``experts'' to independently assess whether a response is hallucinated. For this we select recent and popular LLMs smaller than 7B parameters. By analyzing the log probabilities for tokens that signal a positive or negative judgment, we can determine the likelihood of hallucination. Additionally, we enhance the performance of our ``experts'' by automatically refining their prompts using the recently introduced OPRO framework. Furthermore, we ensemble the replies of the different experts in a uniform or weighted manner, which builds a quorum from the expert replies. Overall this leads to accuracy improvements of up to 10.6 p.p. compared to the challenge baseline. We show that a Zephyr 3B model is well suited for the task. Our approach can be applied in the model-agnostic and model-aware subtasks without modification and is flexible and easily extendable to related tasks."
}
Markdown (Informal)
[Pollice Verso at SemEval-2024 Task 6: The Roman Empire Strikes Back](https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.219/) (Kobs et al., SemEval 2024)
ACL