@inproceedings{khakhmovich-etal-2020-cross,
title = "Cross-lingual Named Entity List Search via Transliteration",
author = "Khakhmovich, Aleksandr and
Pavlova, Svetlana and
Kirillova, Kira and
Arefyev, Nikolay and
Savilova, Ekaterina",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.524",
pages = "4247--4255",
abstract = "Out-of-vocabulary words are still a challenge in cross-lingual Natural Language Processing tasks, for which transliteration from source to target language or script is one of the solutions. In this study, we collect a personal name dataset in 445 Wikidata languages (37 scripts), train Transformer-based multilingual transliteration models on 6 high- and 4 less-resourced languages, compare them with bilingual models from (Merhav and Ash, 2018) and determine that multilingual models perform better for less-resourced languages. We discover that intrinsic evaluation, i.e comparison to a single gold standard, might not be appropriate in the task of transliteration due to its high variability. For this reason, we propose using extrinsic evaluation of transliteration via the cross-lingual named entity list search task (e.g. personal name search in contacts list). Our code and datasets are publicly available online.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khakhmovich-etal-2020-cross">
<titleInfo>
<title>Cross-lingual Named Entity List Search via Transliteration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Khakhmovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Pavlova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kira</namePart>
<namePart type="family">Kirillova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Arefyev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Savilova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Out-of-vocabulary words are still a challenge in cross-lingual Natural Language Processing tasks, for which transliteration from source to target language or script is one of the solutions. In this study, we collect a personal name dataset in 445 Wikidata languages (37 scripts), train Transformer-based multilingual transliteration models on 6 high- and 4 less-resourced languages, compare them with bilingual models from (Merhav and Ash, 2018) and determine that multilingual models perform better for less-resourced languages. We discover that intrinsic evaluation, i.e comparison to a single gold standard, might not be appropriate in the task of transliteration due to its high variability. For this reason, we propose using extrinsic evaluation of transliteration via the cross-lingual named entity list search task (e.g. personal name search in contacts list). Our code and datasets are publicly available online.</abstract>
<identifier type="citekey">khakhmovich-etal-2020-cross</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.524</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4247</start>
<end>4255</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-lingual Named Entity List Search via Transliteration
%A Khakhmovich, Aleksandr
%A Pavlova, Svetlana
%A Kirillova, Kira
%A Arefyev, Nikolay
%A Savilova, Ekaterina
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F khakhmovich-etal-2020-cross
%X Out-of-vocabulary words are still a challenge in cross-lingual Natural Language Processing tasks, for which transliteration from source to target language or script is one of the solutions. In this study, we collect a personal name dataset in 445 Wikidata languages (37 scripts), train Transformer-based multilingual transliteration models on 6 high- and 4 less-resourced languages, compare them with bilingual models from (Merhav and Ash, 2018) and determine that multilingual models perform better for less-resourced languages. We discover that intrinsic evaluation, i.e comparison to a single gold standard, might not be appropriate in the task of transliteration due to its high variability. For this reason, we propose using extrinsic evaluation of transliteration via the cross-lingual named entity list search task (e.g. personal name search in contacts list). Our code and datasets are publicly available online.
%U https://aclanthology.org/2020.lrec-1.524
%P 4247-4255
Markdown (Informal)
[Cross-lingual Named Entity List Search via Transliteration](https://aclanthology.org/2020.lrec-1.524) (Khakhmovich et al., LREC 2020)
ACL
- Aleksandr Khakhmovich, Svetlana Pavlova, Kira Kirillova, Nikolay Arefyev, and Ekaterina Savilova. 2020. Cross-lingual Named Entity List Search via Transliteration. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 4247–4255, Marseille, France. European Language Resources Association.