@inproceedings{scalvini-debess-2024-evaluating,
title = "Evaluating the Potential of Language-family-specific Generative Models for Low-resource Data Augmentation: A {F}aroese Case Study",
author = "Scalvini, Barbara and
Debess, Iben Nyholm",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.lrec-main.576/",
pages = "6496--6503",
abstract = "We investigate GPT-SW3, a generative language model for the Nordic languages, to assess its understanding of the low-resourced Faroese language. Our aim is to demonstrate the advantages of using language-family-specific generative models to augment data for related languages with fewer resources. We evaluate GPT-SW3 by prompting it for Faroese to English translation in a zero, one, and few-shot setting. We assess such translations with an ensemble score consisting of an arithmetic average between the BLEU and a semantic similarity score (SBERT). Moreover, we challenge the model`s Faroese language understanding capabilities on a small dataset of curated Faroese trick sentences. There, we make a qualitative comparison of the model`s performance with respect to Open AI`s GPT-3.5 and GPT-4, demonstrating the advantages of using a language-family-specific generative model for navigating non-trivial scenarios. We evaluate the pipeline thus created and use it, as a proof of concept, to create an automatically annotated Faroese semantic textual similarity (STS) dataset."
}
Markdown (Informal)
[Evaluating the Potential of Language-family-specific Generative Models for Low-resource Data Augmentation: A Faroese Case Study](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.lrec-main.576/) (Scalvini & Debess, LREC-COLING 2024)
ACL