@inproceedings{iordanidou-etal-2024-duth,
title = "{DUT}h at {S}em{E}val-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes",
author = "Iordanidou, Ioanna and
Maslaris, Ioannis and
Arampatzis, Avi",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.154/",
doi = "10.18653/v1/2024.semeval-1.154",
pages = "1064--1070",
abstract = "In this paper, we present our approach toSemEval-2024 Task 6: SHROOM, a Sharedtask on Hallucinations and Related ObservableOvergeneration Mistakes, which aims to determine weather AI generated text is semanticallycorrect or incorrect. This work is a comparative study of Large Language Models (LLMs)in the context of the task, shedding light ontheir effectiveness and nuances. We present asystem that leverages pre-trained LLMs, suchas LaBSE, T5, and DistilUSE, for binary classification of given sentences into {\textquoteleft}Hallucination`or {\textquoteleft}Not Hallucination' classes by evaluatingthe model`s output against the reference correct text. Moreover, beyond utilizing labeleddatasets, our methodology integrates syntheticlabel creation in unlabeled datasets, followedby the prediction of test labels."
}
Markdown (Informal)
[DUTh at SemEval-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.154/) (Iordanidou et al., SemEval 2024)
ACL