@inproceedings{dhamecha-etal-2025-team,
title = "Team Horizon at {BHASHA} Task 1: Multilingual {I}ndic{GEC} with Transformer-based Grammatical Error Correction Models",
author = "Dhamecha, Manav and
Jaat, Sunil and
Damor, Gaurav and
Mishra, Pruthwik",
editor = "Bhattacharya, Arnab and
Goyal, Pawan and
Ghosh, Saptarshi and
Ghosh, Kripabandhu",
booktitle = "Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.bhasha-1.14/",
pages = "142--146",
ISBN = "979-8-89176-313-5",
abstract = "This paper presents Team Horizon{'}s approach to the BHASHA Shared Task 1: Indic Grammatical Error Correction (IndicGEC). We explore transformer-based multilingual models {---} mT5-small and IndicBART {---} to correct grammatical and semantic errors across five Indian languages: Bangla, Hindi, Tamil, Telugu, and Malayalam. Due to limited annotated data, we developed a synthetic data augmentation pipeline that introduces realistic linguistic errors under ten categories, simulating natural mistakes found in Indic scripts. Our fine-tuned models achieved competitive performance with GLEU scores of 86.03 (Tamil), 72.00 (Telugu), 82.69 (Bangla), 80.44 (Hindi), and 84.36 (Malayalam). We analyze the impact of dataset scaling, multilingual fine-tuning, and training epochs, showing that linguistically grounded augmentation can significantly improve grammatical correction accuracy in low-resource Indic languages."
}Markdown (Informal)
[Team Horizon at BHASHA Task 1: Multilingual IndicGEC with Transformer-based Grammatical Error Correction Models](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.bhasha-1.14/) (Dhamecha et al., BHASHA 2025)
ACL