@inproceedings{demirkaya-etal-2025-simulating,
title = "Simulating Rating Scale Responses with {LLM}s for Early-Stage Item Evaluation",
author = "Demirkaya, Onur and
Wei, Hsin-Ro and
Johnson, Evelyn",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.aimecon-main.41/",
pages = "385--392",
ISBN = "979-8-218-84228-4",
abstract = "This study explores the use of large language models to simulate human responses to Likert-scale items. A DeBERTa-base model fine-tuned with item text and examinee ability emulates a graded response model (GRM). High alignment with GRM probabilities and reasonable threshold recovery support LLMs as scalable tools for early-stage item evaluation."
}Markdown (Informal)
[Simulating Rating Scale Responses with LLMs for Early-Stage Item Evaluation](https://preview.aclanthology.org/name-variant-enfa-fane/2025.aimecon-main.41/) (Demirkaya et al., AIME-Con 2025)
ACL
- Onur Demirkaya, Hsin-Ro Wei, and Evelyn Johnson. 2025. Simulating Rating Scale Responses with LLMs for Early-Stage Item Evaluation. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers, pages 385–392, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).