@inproceedings{devadiga-chopra-2026-making,
title = "Making Large Language Models Speak {T}ulu: Structured Prompting for an Extremely Low-Resource Language",
author = "Devadiga, Prathamesh and
Chopra, Paras",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Plum, Alistair and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the Second Workshop on Language Models for Low-Resource Languages ({L}o{R}es{LM} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.5/",
pages = "50--61",
ISBN = "979-8-89176-377-7",
abstract = "Can large language models converse in languages virtually absent from their training data? We investigate this question through a case study on Tulu, a Dravidian language with over two million speakers but minimal digital presence. Rather than fine-tuning, we examine whether structured prompt engineering alone can elicit basic conversational ability under extreme data scarcity. Our framework combines explicit grammar documentation, negative constraints to suppress high-probability tokens from related languages, romanization standardization, and quality-controlled synthetic data generation via self-play. Evaluated on a manually curated held-out set across three LLMs (Gemini 2.0 Flash, GPT-4o, and Llama 3.1 70B) and validated by native speakers, our approach reduces vocabulary contamination from 80{\%} to 5{\%} while achieving 85{\%} grammatical accuracy. Cross-model analysis shows that negative constraints provide consistent improvements (12{--}18 percentage points), while the effectiveness of grammar documentation varies by model architecture (8{--}22 points). These results demonstrate that structured in-context learning can meaningfully extend LLM capabilities to extremely low-resource languages without parameter updates."
}Markdown (Informal)
[Making Large Language Models Speak Tulu: Structured Prompting for an Extremely Low-Resource Language](https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.5/) (Devadiga & Chopra, LoResLM 2026)
ACL