@inproceedings{tran-etal-2026-representation,
title = "Representation-Aware Prompting for Zero-Shot {M}arathi Text Classification: {IPA}, {R}omanization, Repetition",
author = "Tran, Van-Hien and
Vu, Huy Hien and
Tanaka, Hideki and
Utiyama, Masao",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Plum, Alistair and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the Second Workshop on Language Models for Low-Resource Languages ({L}o{R}es{LM} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.37/",
pages = "436--443",
ISBN = "979-8-89176-377-7",
abstract = "Large language models (LLMs) often underperform in zero-shot text classification for low-resource, non-Latin languages due to script and tokenization mismatches. We propose \textit{representation-aware prompting} for Marathi that augments the original script with International Phonetic Alphabet (IPA) transcriptions, romanization, or a repetition-based fallback when external converters are unavailable. Experiments with two instruction-tuned LLMs on Marathi sentiment analysis and hate detection show consistent gains over script-only prompting (up to +2.6 accuracy points). We further find that the most effective augmentation is model-dependent, and that combining all variants is not consistently beneficial, suggesting that concise, targeted cues are preferable in zero-shot settings."
}Markdown (Informal)
[Representation-Aware Prompting for Zero-Shot Marathi Text Classification: IPA, Romanization, Repetition](https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.37/) (Tran et al., LoResLM 2026)
ACL