@inproceedings{liu-etal-2026-cognitive-uncertainty,
title = "Cognitive-Uncertainty Guided Knowledge Distillation for Accurate Classification of Student Misconceptions",
author = "Liu, Qirui and
Chen, Hao and
Shi, Weijie and
Xu, Jiajie and
Zhu, Jia",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1498/",
pages = "29964--29980",
ISBN = "979-8-89176-395-1",
abstract = "Accurately identifying student misconceptions is crucial for personalized education but faces three challenges: (1) data scarcity with long-tail distribution, where authentic student reasoning is difficult to synthesize; (2) fuzzy boundaries between error categories with high annotation noise; (3) deployment paradox{---}large models overlook unconventional approaches due to pretraining bias and cannot be deployed on edge, while small models overfit to noise. Unlike traditional methods that increase diversity through large-scale data synthesis, we propose a two-stage knowledge distillation framework that mines high-value samples from existing data. The first stage performs standard distillation to transfer task capabilities. The second stage introduces a dual-layer marginal selection mechanism based on cognitive uncertainty, identifying four types of critical samples based on teacher model uncertainty and confidence differences. For different data subsets, we design difficulty-adaptive mechanism to balance hard/soft label contributions, enabling student models to inherit inter-class relationships from teacher soft labels while distinguishing ambiguous error types. Experiments show that with augmented training on only 10.30{\%} of filtered samples, we achieve MAP@3 of 0.9585 (+17.8{\%}) on the MAP-Charting dataset, and using only a 4B parameter model, we attain 84.38{\%} accuracy on cross-topic tests of middle school algebra misconception benchmarks, significantly outperforming sota LLM (67.73{\%}) and standard fine-tuned 72B models (81.25{\%}). Our code is available at \url{https://anonymous.4open.science/r/acl2026_map-5847/}."
}Markdown (Informal)
[Cognitive-Uncertainty Guided Knowledge Distillation for Accurate Classification of Student Misconceptions](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1498/) (Liu et al., Findings 2026)
ACL