@inproceedings{p-kulkarni-2025-accent,
title = "Accent Placement Models for Rigvedic {S}anskrit Text",
author = "P, Akhil Rajeev and
Kulkarni, Annarao",
editor = "Bhattacharya, Arnab and
Goyal, Pawan and
Ghosh, Saptarshi and
Ghosh, Kripabandhu",
booktitle = "Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.bhasha-1.11/",
pages = "122--126",
ISBN = "979-8-89176-313-5",
abstract = "The Rigveda, among the oldest Indian texts in Vedic Sanskrit, employs a distinctive pitchaccent system - udatta, anudatta, svarita whose marks encode melodic and interpretive cues but are often absent from moderne-texts. This work develops a parallel corpus of accented-unaccented ́slokas and conducts a controlled comparison of three strategies for automatic accent placement in Rigvedic verse: (i) full fine-tuning of ByT5, a byte-level Transformer that operates directly on Unicode combining marks, (ii) a from-scratch BiLSTM-CRF sequence-labeling baseline, and (iii) LoRA-based parameter-efficient fine-tuning atop ByT5. Evaluation uses Word Error Rate (WER) and Character Error Rate (CER) for orthographic fidelity, plus a task-specific Diacritic Error Rate (DER) that isolates accent edits. Full ByT5 fine-tuning attains the lowest error across all metrics; LoRA offers strong efficiencyaccuracy trade-offs, and BiLSTM-CRF serves as a transparent baseline. The study underscores practical requirements for accent restoration - Unicode-safe preprocessing, mark-aware tokenization, and evaluation that separates grapheme from accent errors - and positions heritage-language technology as an emerging NLP area connecting computational modeling with philological and pedagogical aims. Results establish reproducible baselines for Rigvedic accent restoration and provide guidance for downstream tasks such as accentaware OCR, ASR/chant synthesis, and digital scholarship."
}Markdown (Informal)
[Accent Placement Models for Rigvedic Sanskrit Text](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.bhasha-1.11/) (P & Kulkarni, BHASHA 2025)
ACL
- Akhil Rajeev P and Annarao Kulkarni. 2025. Accent Placement Models for Rigvedic Sanskrit Text. In Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025), pages 122–126, Mumbai, India. Association for Computational Linguistics.