@inproceedings{clark-etal-2025-well,
title = "How Well Does First-Token Entropy Approximate Word Entropy as a Psycholinguistic Predictor?",
author = "Clark, Christian and
Oh, Byung-Doh and
Schuler, William",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-short.4/",
pages = "47--57",
ISBN = "979-8-89176-299-2",
abstract = "Contextual entropy is a psycholinguistic measure capturing the anticipated difficulty of processing a word just before it is encountered. Recent studies have tested for entropy-related effects as a potential complement to well-known effects from surprisal. For convenience, entropy is typically estimated based on a language model{'}s probability distribution over a word{'}s first subword token. However, this approximation results in underestimation and potential distortion of true word entropy. To address this, we generate Monte Carlo (MC) estimates of word entropy that allow words to span a variable number of tokens. Regression experiments on reading times show divergent results between first-token and MC word entropy, suggesting a need for caution in using first-token approximations of contextual entropy."
}Markdown (Informal)
[How Well Does First-Token Entropy Approximate Word Entropy as a Psycholinguistic Predictor?](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-short.4/) (Clark et al., IJCNLP-AACL 2025)
ACL
- Christian Clark, Byung-Doh Oh, and William Schuler. 2025. How Well Does First-Token Entropy Approximate Word Entropy as a Psycholinguistic Predictor?. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 47–57, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.