@inproceedings{li-etal-2025-decoding-llm,
title = "Decoding {LLM} Personality Measurement: Forced-Choice vs. {L}ikert",
author = "Li, Xiaoyu and
Shi, Haoran and
Yu, Zengyi and
Tu, Yukun and
Zheng, Chanjin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.480/",
doi = "10.18653/v1/2025.findings-acl.480",
pages = "9234--9247",
ISBN = "979-8-89176-256-5",
abstract = "Recent research has focused on investigating the psychological characteristics of Large Language Models (LLMs), emphasizing the importance of comprehending their behavioral traits. Likert scale personality questionnaires have become the primary tool for assessing these characteristics in LLMs. However, such scales can be skewed by factors such as social desirability, distorting the assessment of true personality traits. To address this issue, we firstly incorporate the forced-choice test, a method known for reducing response bias in human personality assessments, into the evaluation of LLM. Specifically, we evaluated six LLMs: Llama-3.1-8B, GLM-4-9B, GPT-3.5-turbo, GPT-4o, Claude-3.5-sonnet, and Deepseek-V3. We compared the Likert scale and forced-choice test results for LLMs' Big Five personality scores, as well as their reliability. In addition, we looked at how temperature parameter and language affected LLM personality scores. The results show that the forced-choice test better captures differences between LLMs across various personality dimensions and is less influenced by temperature parameters. Furthermore, we found both broad trends and specific variations in personality scores across models and languages."
}
Markdown (Informal)
[Decoding LLM Personality Measurement: Forced-Choice vs. Likert](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.480/) (Li et al., Findings 2025)
ACL