@inproceedings{hou-etal-2026-subtokentest,
title = "{S}ub{T}oken{T}est: A Practical Benchmark for Real-World Sub-token Understanding",
author = "Hou, Shuyang and
Hu, Yi and
Zhang, Muhan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.915/",
pages = "19957--19999",
ISBN = "979-8-89176-390-6",
abstract = "Recent advancements in large language models (LLMs) have significantly enhanced their reasoning capabilities. However, they continue to struggle with basic character-level tasks, such as counting letters in words{---}a problem rooted in their tokenization process. While existing benchmarks have highlighted this weakness through basic character operations, such failures are often dismissed due to lacking practical relevance. Yet, many real-world applications, such as navigating text-based maps or interpreting structured tables, rely heavily on precise sub-token understanding. In this regard, we introduce SubTokenTest, a comprehensive benchmark that assesses sub-token understanding through **practical, utility-driven** tasks. Our benchmark includes ten tasks across four domains and isolates tokenization-related failures by decoupling performance from complex reasoning. We provide a comprehensive evaluation of nine advanced LLMs. Additionally, we investigate the impact of test-time scaling on sub-token reasoning and explore how character-level information is encoded within the hidden states."
}Markdown (Informal)
[SubTokenTest: A Practical Benchmark for Real-World Sub-token Understanding](https://preview.aclanthology.org/ingest-acl/2026.acl-long.915/) (Hou et al., ACL 2026)
ACL