@inproceedings{dipta-etal-2026-ganitllm,
title = "{G}anit{LLM}: Difficulty-Aware {B}engali Mathematical Reasoning through Curriculum-{GRPO}",
author = "Dipta, Shubhashis Roy and
Mahbub, Khairul and
Najjar, Nadia",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1995/",
pages = "40133--40148",
ISBN = "979-8-89176-395-1",
abstract = "We present a Bengali mathematical reasoning model called GanitLLM (named after the Bangla word for mathematics, ``Ganit''), together with a new difficulty-aware Bengali math corpus and a curriculum-based GRPO pipeline. Bengali is one of the world{'}s most widely spoken languages, yet existing LLMs either reason in English and then translate, or simply fail on multi-step Bengali math, in part because reinforcement learning recipes are tuned for high-resource languages and collapse under reward sparsity in low-resource settings. To address this, we construct Ganit, a rigorously filtered and decontaminated Bengali math dataset with automatic difficulty tags derived from the pass@k of a strong evaluator model. Building on this dataset, we propose Curriculum-GRPO, which combines multi-stage training (SFT + GRPO) with difficulty-aware sampling and verifiable rewards for format, numerical correctness, and Bengali reasoning. On Bn-MGSM and Bn-MSVAMP, GanitLLM-4B improves over its Qwen3-4B base by +8 and +7 accuracy points, respectively, while increasing the percentage of Bengali reasoning tokens from 14{\%} to over 88{\%} and reducing average solution length from 943 to 193 words."
}Markdown (Informal)
[GanitLLM: Difficulty-Aware Bengali Mathematical Reasoning through Curriculum-GRPO](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1995/) (Dipta et al., Findings 2026)
ACL