@inproceedings{li-etal-2025-comparing,
title = "Comparing {AI} tools and Human Raters in Predicting Reading Item Difficulty",
author = "Li, Hongli and
Aldib, Roula and
Marchong, Chad and
Fan, Kevin",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.aimecon-wip.10/",
pages = "84--89",
ISBN = "979-8-218-84229-1",
abstract = "This study compares AI tools and human raters in predicting the difficulty of reading comprehension items without response data. Predictions from AI models (ChatGPT, Gemini, Claude, and DeepSeek) and human raters are evaluated against empirical difficulty values derived from student responses. Findings will inform AI{'}s potential to support test development."
}Markdown (Informal)
[Comparing AI tools and Human Raters in Predicting Reading Item Difficulty](https://preview.aclanthology.org/ingest-emnlp/2025.aimecon-wip.10/) (Li et al., AIME-Con 2025)
ACL
- Hongli Li, Roula Aldib, Chad Marchong, and Kevin Fan. 2025. Comparing AI tools and Human Raters in Predicting Reading Item Difficulty. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress, pages 84–89, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).