@inproceedings{panda-etal-2026-annotating,
title = "Annotating {I}ndian Regional Biases using Large Language Models: Evaluation and Analysis",
author = "Panda, Debasmita and
Anil, Akash and
Shukla, Neelesh Kumar",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.starsem-conference.16/",
pages = "255--263",
ISBN = "979-8-89176-413-2",
abstract = "Social biases based on regional identity (or regional bias) are often observed in Indian contexts on major online social networks and require critical attention. However, due to large linguistic and cultural diversity, high annotation costs, and inherent human biases, very little annotated data exists on regional biases in the Indian context. Recently, Large Language Models (LLMs) have garnered attention for the automatic annotation of text. However, such annotation efforts are largely limited to English texts, and LLMs often perform poorly when applied to low-resource languages. Therefore, this paper focuses on understanding the capabilities and challenges of popular open-source LLMs in annotating Indian regional biases. We utilize the recently proposed IndRegBias dataset, which consists of Indian regionally biased social media comments in both English and code-mixed formats. First, we assess the annotation capabilities of LLMs in a zero-shot setting and critically analyze their performance across different writing styles, including code-mixing, transliteration, and English. We find that the majority of LLMs exhibit low agreement with human annotations (measured using Cohen{'}s kappa). Consequently, we extend our study by fine-tuning the models using 50{\%} of the data and evaluating them on the remaining 50{\%}. We observe a significant improvement in annotation agreement (kappa) for all the LLMs. To further assess the capabilities of the fine-tuned models, we evaluate them on 500 newly collected social media comments discussing regional issues in India. The results show that most fine-tuned LLMs outperform their zero-shot counterparts when annotating these new comments."
}Markdown (Informal)
[Annotating Indian Regional Biases using Large Language Models: Evaluation and Analysis](https://preview.aclanthology.org/ingest-acl-workshops/2026.starsem-conference.16/) (Panda et al., *SEM 2026)
ACL