@inproceedings{hohl-shim-2023-vardial,
title = "{V}ar{D}ial in the Wild: Industrial Applications of {LID} Systems for Closely-Related Language Varieties",
author = "Hohl, Fritz and
Shim, Soh-eun",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, J{\"o}rg and
Zampieri, Marcos},
booktitle = "Tenth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.vardial-1.21/",
doi = "10.18653/v1/2023.vardial-1.21",
pages = "213--221",
abstract = "This report describes first an industrial use case for identifying closely related languages, e.g.dialects, namely the detection of languages of movie subtitle documents. We then presenta 2-stage architecture that is able to detect macrolanguages in the first stage and languagevariants in the second. Using our architecture, we participated in the DSL-TL Shared Task of the VarDial 2023 workshop. We describe the results of our experiments. In the first experiment we report an accuracy of 97.8{\%} on a set of 460 subtitle files. In our second experimentwe used DSL-TL data and achieve a macroaverage F1 of 76{\%} for the binary task, and 54{\%} for the three-way task in the dev set. In the open track, we augment the data with named entities retrieved from Wikidata and achieve minor increases of about 1{\%} for both tracks."
}
Markdown (Informal)
[VarDial in the Wild: Industrial Applications of LID Systems for Closely-Related Language Varieties](https://preview.aclanthology.org/fix-sig-urls/2023.vardial-1.21/) (Hohl & Shim, VarDial 2023)
ACL