@inproceedings{tanaka-etal-2026-constructing,
title = "Constructing a Dataset for Hallucination Detection in {J}apanese Summarization with Fine-grained Faithfulness Labels",
author = "Tanaka, Hikari and
Keyaki, Atsushi and
Komachi, Mamoru",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.15/",
pages = "207--218",
ISBN = "979-8-89176-383-8",
abstract = "Large language models (LLMs) can generate fluent text, but the quality of generated content crucially depends on its consistency with the given input.This aspect is commonly referred to as faithfulness, which concerns whether the output is properly grounded in the input context.A major challenge related to faithfulness is that generated content may include information not supported by the input or may contradict it.This phenomenon is often referred to as hallucination, and increasing attention has been paid to automatic hallucination detection, which determines whether an LLM{'}s output is hallucinated.To evaluate the performance of hallucination detection systems, researchers use evaluation datasets with labels indicating the presence or absence of hallucinations.While such datasets have been developed for English and Chinese, Japanese evaluation resources for hallucination detection remain limited.Therefore, we constructed a Japanese evaluation dataset for hallucination detection in summarization by manually annotating sentence-level faithfulness labels in LLM-generated summaries of Japanese documents.We annotate 390 summaries (1,938 sentences) generated by three LLMs with sentence-level multi-label annotations for faithfulness with respect to the input document.The taxonomy extends a prior classification scheme and captures distinct patterns of model errors, enabling both binary hallucination detection and fine-grained error-type analysis of Japanese LLM summarization."
}Markdown (Informal)
[Constructing a Dataset for Hallucination Detection in Japanese Summarization with Fine-grained Faithfulness Labels](https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.15/) (Tanaka et al., EACL 2026)
ACL