@inproceedings{manukonda-kodali-2025-bytesizedllm,
title = "byte{S}ized{LLM}@{NLU} of {D}evanagari Script Languages 2025: Language Identification Using Customized Attention {B}i{LSTM} and {XLM}-{R}o{BERT}a base Embeddings",
author = "Manukonda, Durga Prasad and
Kodali, Rohith Gowtham",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2025.chipsal-1.26/",
pages = "248--252",
abstract = "This study explores the challenges of natural language understanding (NLU) in multilingual contexts, focusing on Devanagari-scripted languages such as Nepali, Marathi, Sanskrit, Bhojpuri, and Hindi. Language identification within these languages is complex due to their structural and lexical similarities. We present a hybrid Attention BiLSTM-XLM-RoBERTa model, achieving a state-of-the-art F1 score of 0.9974 on the test set, despite limited resources. Our model effectively distinguishes between closely related Devanagari-scripted languages, providing a solid foundation for context-aware NLU systems that enhance language-specific processing and promote inclusive digital interactions across diverse linguistic communities."
}
Markdown (Informal)
[byteSizedLLM@NLU of Devanagari Script Languages 2025: Language Identification Using Customized Attention BiLSTM and XLM-RoBERTa base Embeddings](https://preview.aclanthology.org/add-emnlp-2024-awards/2025.chipsal-1.26/) (Manukonda & Kodali, CHiPSAL 2025)
ACL