@inproceedings{alam-anastasopoulos-2025-large,
title = "Large Language Models as a Normalizer for Transliteration and Dialectal Translation",
author = "Alam, Md Mahfuz Ibn and
Anastasopoulos, Antonios",
editor = "Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, Jorg and
Zampieri, Marcos",
booktitle = "Proceedings of the 12th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2025.vardial-1.5/",
pages = "39--67",
abstract = "NLP models trained on standardized language data often struggle with variations. We assess various Large Language Models (LLMs) for transliteration and dialectal normalization. Tuning open-source LLMs with as little as 10,000 parallel examples using LoRA can achieve results comparable to or better than closed-source LLMs. We perform dialectal normalization experiments for twelve South Asian languages and dialectal translation experiments for six language continua worldwide. The dialectal normalization task can also be a preliminary step for the downstream dialectal translation task. Among the six languages used in dialectal translation, our approach enables Italian and Swiss German to surpass the baseline model by 21.5 and 25.8 BLEU points, respectively."
}
Markdown (Informal)
[Large Language Models as a Normalizer for Transliteration and Dialectal Translation](https://preview.aclanthology.org/add-emnlp-2024-awards/2025.vardial-1.5/) (Alam & Anastasopoulos, VarDial 2025)
ACL