@inproceedings{li-liang-2026-bbgame,
title = "{BB}game at {S}em{E}val-2026 Task 12: Small Lanugage Model Fintuning for Abductive Event Reasoning task",
author = "Li, Shu and
Liang, Huizhi(elly)",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.122/",
pages = "894--898",
ISBN = "979-8-89176-414-9",
abstract = "We introduce a three-stage training framework for abductive event reasoning(AER). The task dataset were decomposed into 3 subsets, causal judgment, cause generation, and multiple choice answering(MCQA). Abductive reasoning requires understanding complex causal relationships between events. However, small language models typically struggle due to the multi-step inference required. Our approach provided supervised fine-tuning with group relative policy optimization(GRPO) to enlarge the reasoning capabilities based on an 0.5b parameter model. On the SemEval-2026 Task 12 development set, out Casual-Qwen 0.5B model achieves {\$}64.75{\textbackslash}{\%}{\$}, abslute outperforming {\$}63.78{\textbackslash}{\%}{\$} Qwen2.5:0.5b at {\$}0.0975{\textbackslash}{\%}{\$}. Our ablation study reveals that binary casual judgement rather than cause generation or direct MCQA training is the key skill for AER task, with more complex stages significantly underperforming due to the task misalignment or task complexicity."
}Markdown (Informal)
[BBgame at SemEval-2026 Task 12: Small Lanugage Model Fintuning for Abductive Event Reasoning task](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.122/) (Li & Liang, SemEval 2026)
ACL