@inproceedings{faisal-chowdhury-2026-cuetluminaries,
title = "{CUETL}uminaries at {S}em{E}val-2026 Task 11 Disentangling Logical Validity from Semantic Plausibility through Canonical Abstraction",
author = "Faisal, Adnan and
Chowdhury, Shiti",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.70/",
pages = "490--496",
ISBN = "979-8-89176-414-9",
abstract = "Determining whether large language models (LLMs) perform genuine formal reasoning or rely on semantic heuristics is a key challenge in NLP. Syllogistic reasoning constitutes a theoretically principled evaluation paradigm where validity is fully determined by quantifier structure, allowing systematic analysis of structural inference disentangled from semantic plausibility.SemEval-2026 Task-11, Subtask-1: Disentangling Content and Formal Reasoning in Language Models, establishes a multilingual benchmark designed to rigorously isolate formal logical validity from semantic plausibility effects. The subtask evaluates English syllogistic reasoning under a binary classification setting using Overall Accuracy (ACC) and Total Content Effect (TCE), where lower TCE indicates stronger resistance to content-induced bias.Our proposed approach combines cross-validation, structured aggregation and bias-aware evaluation to optimize the robustness{--}performance trade-off. It achieves 93.19{\textbackslash}{\%} accuracy with a TCE of 3.13, yielding a strong combined score of 38.56 under the official evaluation metric. Condition-wise and multi-run analysis confirms that robustness-focused optimization curbs content-driven errors, reinforcing the necessity of bias-aware training for formal inference"
}Markdown (Informal)
[CUETLuminaries at SemEval-2026 Task 11 Disentangling Logical Validity from Semantic Plausibility through Canonical Abstraction](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.70/) (Faisal & Chowdhury, SemEval 2026)
ACL