@inproceedings{sheng-etal-2025-evaluating,
title = "Evaluating Generalization Capability of Language Models across Abductive, Deductive and Inductive Logical Reasoning",
author = "Sheng, Yu and
Wen, Wanting and
Li, Linjing and
Zeng, Daniel",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.330/",
pages = "4945--4957",
abstract = "Transformer-based language models (LMs) have demonstrated remarkable performance on many natural language tasks, yet to what extent LMs possess the capability of generalizing to unseen logical rules remains not explored sufficiently. In classical logic category, abductive, deductive and inductive (ADI) reasoning are defined as the fundamental reasoning types, sharing the identical reasoning primitives and properties, and some research have proposed that there exists mutual generalization across them. However, in the field of natural language processing, previous research generally study LMs' ADI reasoning capabilities separately, overlooking the generalization across them. To bridge this gap, we propose UniADILR, a novel logical reasoning dataset crafted for assessing the generalization capabilities of LMs across different logical rules. Based on UniADILR, we conduct extensive investigations from various perspectives of LMs' performance on ADI reasoning. The experimental results reveal the weakness of current LMs in terms of extrapolating to unseen rules and inspire a new insight for future research in logical reasoning."
}
Markdown (Informal)
[Evaluating Generalization Capability of Language Models across Abductive, Deductive and Inductive Logical Reasoning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.330/) (Sheng et al., COLING 2025)
ACL