@inproceedings{hu-etal-2025-fine,
title = "Fine-tuning Large Language Models for Improving Factuality in Legal Question Answering",
author = "Hu, Yinghao and
Gan, Leilei and
Xiao, Wenyi and
Kuang, Kun and
Wu, Fei",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.298/",
pages = "4410--4427",
abstract = "Hallucination, or the generation of incorrect or fabricated information, remains a critical challenge in large language models (LLMs), particularly in high-stake domains such as legal question answering (QA). In order to mitigate the hallucination rate in legal QA, we first introduce a benchmark called LegalHalBench and three automatic metrics to evaluate the common hallucinations when LLMs answer legal questions. We then propose a hallucination mitigation method that integrates behavior cloning and a novel Hard Sample-aware Iterative Direct Preference Optimization (HIPO). We conduct extensive real-data experiments to validate the effectiveness of our approach. Our results demonstrate remarkable improvements in various metrics, including the newly proposed Non-Hallucinated Statute Rate, Statute Relevance Rate, Legal Claim Truthfulness, as well as traditional metrics such as METEOR, BERTScore, ROUGE-L, and win rates."
}
Markdown (Informal)
[Fine-tuning Large Language Models for Improving Factuality in Legal Question Answering](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.298/) (Hu et al., COLING 2025)
ACL