@inproceedings{lavadeep-etal-2026-indiann,
title = "{I}ndi{A}nn: A Web-based Annotation Platform for {I}ndic Languages",
author = "Lavadeep, Bandaru and
Raghav, Ritwik and
Jana, Abhik",
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.10/",
pages = "130--145",
ISBN = "979-8-89176-404-0",
abstract = "Linguistic annotation tools that work well for non-Indic languages (e.g. English, German, Spanish, etc.) often fail with Indic scripts due to complex Unicode properties, including visual reordering of vowel matras, conjunct characters, and grapheme clusters spanning multiple code points. In this paper, we present a web-based annotation platform \textit{IndiAnn}, designed for low-resource Indic languages, which uses native browser Unicode rendering, offset-based storage that preserves grapheme clusters, and no forced tokenization in the user interface. The tool supports annotation for tasks such as part-of-speech (POS) tagging, named entity recognition (NER), dependency relation annotation, and semantic role labelling (SRL), that maintain correct character boundaries and enable seamless interoperability with standard NLP pipelines and tools. The framework is designed for Indic languages and has been tested on Telugu, Hindi, Tamil, Malayalam, Bengali, Odia, Marathi, and Kannada, with no script breakage during annotation. To the best of our knowledge, this is the first ever attempt at building a unified annotation framework (IndiAnn), which covers annotation for such varieties of key NLP tasks, having provision for eight Indic languages. The code repository is made publicly available[ {\ensuremath{<}}https://github.com/Lavadeep/INDIANN{\ensuremath{>}}]."
}