@inproceedings{gupta-etal-2026-causalminds,
title = "{C}ausal{M}inds at {S}em{E}val-2026 Task 12: Simple Fine-Tuning with Option Shuffling Outperforms Complex Pipelines for Abductive Event Reasoning",
author = "Gupta, Vidur and
Zhao, Xiaofei and
Shaye, Jason",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.358/",
pages = "2852--2859",
ISBN = "979-8-89176-414-9",
abstract = "We describe our system for SemEval-2026 Task 12 on Abductive Event Reasoning, which requires identifying plausible direct cause(s) of real-world events. We conduct a systematic evaluation of 23 configurations spanning prompting, retrieval-augmented generation, multi-stage verification, and supervised fine-tuning across models of different scales. Across experiments, we found that fine-tuning GPT-4.1-mini with data augmentation via option shuffling consistently outperformed more complex multi-stage pipelines and larger-model prompting strategies. Our system scores 0.88 on the test dataset, ranking 19th out of 221 submissions, which is only 0.07 away from the highest scoring submission of 0.95. Interestingly, chain-of-thought prompting and multi-stage verification hurt performance compared to simpler baselines. This reinforces that simplicity can outperform complex pipelines. We document these negative results and examine the persistent gap between development (0.991) and test (0.88) scores."
}Markdown (Informal)
[CausalMinds at SemEval-2026 Task 12: Simple Fine-Tuning with Option Shuffling Outperforms Complex Pipelines for Abductive Event Reasoning](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.358/) (Gupta et al., SemEval 2026)
ACL