@inproceedings{koberidze-etal-2025-benchmark,
title = "A Benchmark for Evaluating Logical Reasoning in {G}eorgian For Large Language Models",
author = "Koberidze, Irakli and
Elizbarashvili, Archil and
Tsintsadze, Magda",
editor = "Estevanell-Valladares, Ernesto Luis and
Picazo-Izquierdo, Alicia and
Ranasinghe, Tharindu and
Mikaberidze, Besik and
Ostermann, Simon and
Gurgurov, Daniil and
Mueller, Philipp and
Borg, Claudia and
{\v{S}}imko, Mari{\'a}n",
booktitle = "Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.lowresnlp-1.13/",
pages = "121--130",
abstract = "Advancements in LLMs have largely overlooked low-resource languages (LRLs), creating a gap in evaluation benchmarks. To address this for Georgian, a Kartvelian language, we introduce GeoLogicQA. This novel, manually-curated benchmark assesses LLMs' logical and inferential reasoning through 100 questions. Questions cover syllogistic deduction, inferential reading comprehension, common-sense reasoning, and arithmetic, adapted from challenging sources (Kangaroo Mathematics Competition) and validated by native Georgian speakers for linguistic nuances. Initial evaluations of state-of-the-art LLMs (Gemini 2.5 Flash, DeepSeek-V3, Grok-3, GPT-4o) show an average accuracy of 64{\%} to 83{\%}, significantly exceeding the human baseline of 47{\%}. While demonstrating strong reasoning potential, error analysis reveals persistent challenges in multi-step combinatorial and highly constrained inferential tasks. GeoLogicQA is a public resource for tracking progress and diagnosing weaknesses in Georgian LLMs. We plan to expand the benchmark and establish a public leader-board to foster continuous improvement."
}Markdown (Informal)
[A Benchmark for Evaluating Logical Reasoning in Georgian For Large Language Models](https://preview.aclanthology.org/corrections-2026-01/2025.lowresnlp-1.13/) (Koberidze et al., LowResNLP 2025)
ACL