@inproceedings{ondrejova-suppa-2024-llms,
title = "Can {LLM}s Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in {\v{S}}ari{\v{s}}",
author = "Ondrejov{\'a}, Vikt{\'o}ria and
{\v{S}}uppa, Marek",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.vardial-1.11/",
doi = "10.18653/v1/2024.vardial-1.11",
pages = "130--139",
abstract = "While Large Language Models (LLMs) have demonstrated considerable potential in advancing natural language processing in dialect-specific contexts, their effectiveness in these settings has yet to be thoroughly assessed. This study introduces a case study on {\v{S}}ari{\v{s}}, a dialect of Slovak, which is itself a language with fewer resources, focusing on Machine Translation and Common Sense Reasoning tasks. We employ LLMs in a zero-shot configuration and for data augmentation to refine Slovak-{\v{S}}ari{\v{s}} and {\v{S}}ari{\v{s}}-Slovak translation models. The accuracy of these models is then manually verified by native speakers. Additionally, we introduce {\v{S}}ari{\v{s}}COPA, a new dataset for causal common sense reasoning, which, alongside SlovakCOPA, serves to evaluate LLM`s performance in a zero-shot framework. Our findings highlight LLM`s capabilities in processing low-resource dialects and suggest a viable approach for initiating dialect-specific translation models in such contexts."
}
Markdown (Informal)
[Can LLMs Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in Šariš](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.vardial-1.11/) (Ondrejová & Šuppa, VarDial 2024)
ACL