@inproceedings{du-etal-2025-investigating,
title = "Investigating Value-Reasoning Reliability in Small Large Language Models",
author = "Du, Xia and
Sun, Shuhan and
Liu, Pengyuan and
Yu, Dong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.395/",
doi = "10.18653/v1/2025.emnlp-main.395",
pages = "7757--7797",
ISBN = "979-8-89176-332-6",
abstract = "Although small Large Language models (sLLMs) have been widely deployed in practical applications, little attention has been paid to their value-reasoning abilities, particularly in terms of reasoning reliability. To address this gap, we propose a systematic evaluation framework for assessing the Value-Reasoning Reliability of sLLMs. We define Value-Reasoning Reliability as comprising: (1) Output consistency under identical prompts, (2) Output Robustness under semantically equivalent prompts, (3) Maintaining stable value reasoning in the face of attacks, and (4) Consistency of value reasoning in open-ended value expression tasks. Our framework includes three core tasks: Repetition Consistency task, Interaction Stability task, and Open-ended Expression Consistency task. We further incorporate self-reported confidence scores to evaluate the model{'}s value reasoning reliability from two perspectives: the model{'}s self-awareness of its values, and its value-based decision-making. Our findings show that models vary significantly in their stability when responding to value-related questions. Moreover, we observe considerable output randomness, which is not always correlated with the self-reported confidence or expressed value preferences. This suggests that current models lack a reliable internal mechanism for stable value reasoning when addressing value-sensitive queries."
}Markdown (Informal)
[Investigating Value-Reasoning Reliability in Small Large Language Models](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.395/) (Du et al., EMNLP 2025)
ACL