@inproceedings{ballout-etal-2025-funghifunghi,
title = "{F}unghi{F}unghi at {S}em{E}val-2025 Task 3: Mu-{SHROOM}, the Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes",
author = "Ballout, Tariq and
Jansma, Pieter and
Koops, Nander and
Zhou, Yong Hui",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.211/",
pages = "1602--1608",
ISBN = "979-8-89176-273-2",
abstract = "Large Language Models (LLMs) often generate hallucinated content, which is factually incorrect or misleading, posing reliability challenges. The Mu-SHROOM shared task addresses hallucination detection in multilingualLLM-generated text. This study employsSpanBERT, a transformer model optimized forspan-based predictions, to identify hallucinatedspans across multiple languages. To addresslimited training data, we apply dataset augmentation through translation and synthetic generation. The model is evaluated using Intersection over Union (IoU) for span detectionand Spearman{'}s correlation for ranking consistency. While the model detects hallucinatedspans with moderate accuracy, it struggles withranking confidence scores. These findings highlight the need for improved probability calibration and multilingual robustness. Future workshould refine ranking methods and explore ensemble models for better performance."
}
Markdown (Informal)
[FunghiFunghi at SemEval-2025 Task 3: Mu-SHROOM, the Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.211/) (Ballout et al., SemEval 2025)
ACL