@inproceedings{bayan-memar-etal-2026-ellat,
title = "Ellat at {S}em{E}val-2026 Task 11: Comparing Encoder and Decoder Models for Syllogistic Reasoning",
author = "Bayan Memar, Farzaneh and
Huls, Hanneke and
Ten Hove, Matthijs",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.270/",
pages = "2130--2138",
ISBN = "979-8-89176-414-9",
abstract = "For SemEval-2026 Task 11 (Subtask 1: English), Team Ellat investigates whether language models can assess logical validity independently of semantic plausibility. Since these models learn statistical patterns instead of explicit logical rules, they often rely on world knowledge and semantic shortcuts rather than formal logic. To address this challenge, we evaluate three architectures: MiniLM-L6-mnli-binary, DeBERTa-v3-small, and Llama 3.1-8B-Instruct, applying task-specific fine-tuning for encoder models and Abstract Logic Augmentation with QLoRA for LLaMA. DeBERTa achieved the strongest overall performance, MiniLM showed clear reductions in content bias after fine-tuning, and Llama 3.1-8B exhibited strong plausibility bias in the zero-shot setting. However, our augmented fine-tuning approach led to only modest improvements and a partial shift toward structure-based reasoning. Overall, fine-tuning and abstraction-based augmentation reduce plausibility bias, but fully separating logical validity from semantic content remains challenging across architectures."
}Markdown (Informal)
[Ellat at SemEval-2026 Task 11: Comparing Encoder and Decoder Models for Syllogistic Reasoning](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.270/) (Bayan Memar et al., SemEval 2026)
ACL