@inproceedings{krishnasamy-2026-gigitai,
title = "{G}igit{AI} at {S}em{E}val-2026 Task 11: Hybrid Symbolic-Neural Approach for Syllogistic Validity Classification",
author = "Krishnasamy, Saran",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.425/",
pages = "3432--3441",
ISBN = "979-8-89176-414-9",
abstract = "We present our system for SemEval-2026 Task 11 on classifying whether syllogisms are logically valid. The main challenge is that language models tend to judge arguments based on whether the conclusion sounds true in the real world, rather than whether it follows logically from the premises. We evaluate direct prompting across six models (GPT-4o, GPT-5.2, o3, o3-mini, Claude Opus 4.6, Claude Sonnet 4) with three prompt strategies, finding that even the best achieves only 89.5{\%} accuracy. Our best-performing system splits the task into two parts: GPT-4o-mini extracts the logical structure, then deterministic rules check validity, enhanced with bidirectional premise checking, predicate negation post-processing, and a targeted rule-based fallback for double negation. This achieves 98.95{\%} accuracy on Subtask 1 (combined score 57.74) and 85.8{\%} validity accuracy on Subtask 2. We also explore self-consistency with symbolic verification (93.1{\%}), content abstraction, activation steering, contrastive fine-tuning, RLVR, and diffusion-based reasoning, finding that content abstraction surprisingly degrades performance, revealing that semantic content provides essential parsing scaffolding alongside the bias it introduces."
}Markdown (Informal)
[GigitAI at SemEval-2026 Task 11: Hybrid Symbolic-Neural Approach for Syllogistic Validity Classification](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.425/) (Krishnasamy, SemEval 2026)
ACL