@inproceedings{han-etal-2026-crossing,
title = "{C}ros{S}ing: Cross-Scale Reasoning Evaluation on {LLM}s against Humans",
author = "Han, Qi and
Wu, Yifan and
Schijndel, Marten Van",
editor = "Voigt, Rob and
Warstadt, Alex and
Feldman, Naomi and
Linzen, Tal",
booktitle = "Proceedings of the Society for Computation in Linguistics 2026",
month = jul,
year = "2026",
address = "San Diego, CA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.36/",
pages = "379--407",
ISBN = "979-8-89176-412-5",
abstract = "While many studies have shown LLMs perform well in various reasoning tasks, few have examined their capacity on semantic reasoning tasks. As LLMs reason with language, it is crucial to understand how well they grasp and use the underlying scalar relationships in language. In this study, we introduced a new dataset CrosSing (Cross-Scale reasoning), providing a human baseline against which to evaluate LLMs' ability to reason across lexical scales in gradable adjectives. We further probed how their understanding is influenced by overinformative contexts. We evaluated ten high-performing LLMs and found that some outperformed humans when no extra information was provided, but that LLM performance declined in certain overinformative contexts while human performance improved significantly. This contrast reveals a fundamental difference between recent LLMs and humans in understanding adjectives' scalar relationships and how such understanding behaves in overinformative contexts."
}Markdown (Informal)
[CrosSing: Cross-Scale Reasoning Evaluation on LLMs against Humans](https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.36/) (Han et al., SCiL 2026)
ACL