@inproceedings{koloski-etal-2022-thin,
title = "Out of Thin Air: Is Zero-Shot Cross-Lingual Keyword Detection Better Than Unsupervised?",
author = "Koloski, Boshko and
Pollak, Senja and
{\v{S}}krlj, Bla{\v{z}} and
Martinc, Matej",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.42/",
pages = "400--409",
abstract = "Keyword extraction is the task of retrieving words that are essential to the content of a given document. Researchers proposed various approaches to tackle this problem. At the top-most level, approaches are divided into ones that require training - supervised and ones that do not - unsupervised. In this study, we are interested in settings, where for a language under investigation, no training data is available. More specifically, we explore whether pretrained multilingual language models can be employed for zero-shot cross-lingual keyword extraction on low-resource languages with limited or no available labeled training data and whether they outperform state-of-the-art unsupervised keyword extractors. The comparison is conducted on six news article datasets covering two high-resource languages, English and Russian, and four low-resource languages, Croatian, Estonian, Latvian, and Slovenian. We find that the pretrained models fine-tuned on a multilingual corpus covering languages that do not appear in the test set (i.e. in a zero-shot setting), consistently outscore unsupervised models in all six languages."
}
Markdown (Informal)
[Out of Thin Air: Is Zero-Shot Cross-Lingual Keyword Detection Better Than Unsupervised?](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.42/) (Koloski et al., LREC 2022)
ACL