@inproceedings{dabre-etal-2024-machine,
title = "Machine Translation Of {M}arathi Dialects: A Case Study Of Kadodi",
author = "Dabre, Raj and
Dabre, Mary and
Pereira, Teresa",
editor = "Nakazawa, Toshiaki and
Goto, Isao",
booktitle = "Proceedings of the Eleventh Workshop on Asian Translation (WAT 2024)",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.wat-1.3/",
doi = "10.18653/v1/2024.wat-1.3",
pages = "36--44",
abstract = "While Marathi is considered as a low- to middle-resource language, its 42 dialects have mostly been ignored, mainly because these dialects are mostly spoken and rarely written, making them extremely low-resource. In this paper we explore the machine translation (MT) of Kadodi, also known as Samvedi, which is a dialect of Marathi. We first discuss the Kadodi dialect, highlighting the differences from the standard dialect, followed by presenting a manually curated dataset called Suman consisting of a trilingual Kadodi-Marathi-English dictionary of 949 entries and 942 simple sentence triples and idioms created by native Kadodi speakers. We then evaluate 3 existing large language models (LLMs) supporting Marathi, namely Gemma-2-9b, Sarvam-2b-0.5 and LLaMa-3.1-8b, in few-shot prompting style to determine their efficacy for translation involving Kadodi. We observe that these models exhibit rather lackluster performance in handling Kadodi even for simple sentences, indicating a dire situation."
}
Markdown (Informal)
[Machine Translation Of Marathi Dialects: A Case Study Of Kadodi](https://preview.aclanthology.org/fix-sig-urls/2024.wat-1.3/) (Dabre et al., WAT 2024)
ACL