@inproceedings{liang-etal-2026-tubingen,
title = {{T}{\"u}bingen-{CL} at {S}em{E}val-2026 Task 12: Reinforcement Learning and Verification for Abductive Reasoning},
author = "Liang, Bolun and
Khudaybergenova, Ayperi and
Kankanamge, Shashikala",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.330/",
pages = "2621--2629",
ISBN = "979-8-89176-414-9",
abstract = "We investigate the reliability of verifier-based pipelines for abductive reasoning in SemEval-2026 Task 12. While reinforcement learning improves the base generator{'}s performance, we find that incorporating a small-model verifier introduces a significant generalization gap: although effective on validation data, the verifier systematically degrades correct predictions on the unseen test set by appending false positives. Furthermore, we reveal a critical vulnerability in the official evaluation metric, which assigns zero reward to abstentions but does not sufficiently penalize incorrect selections. This asymmetry enables trivial heuristic strategies such as blindly selecting a default option to substantially inflate performance, even outperforming more principled reasoning systems. Our analysis demonstrates that current evaluation protocols can misrepresent true reasoning ability and highlights the need for more robust verification methods and scoring schemes."
}Markdown (Informal)
[Tübingen-CL at SemEval-2026 Task 12: Reinforcement Learning and Verification for Abductive Reasoning](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.330/) (Liang et al., SemEval 2026)
ACL