@inproceedings{fourrier-sagot-2020-methodological,
title = "Methodological Aspects of Developing and Managing an Etymological Lexical Resource: Introducing {E}tym{DB}-2.0",
author = "Fourrier, Cl{\'e}mentine and
Sagot, Beno{\^i}t",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.392/",
pages = "3207--3216",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "Diachronic lexical information is not only important in the field of historical linguistics, but is also increasingly used in NLP, most recently for machine translation of low resource languages. Therefore, there is a need for fine-grained, large-coverage and accurate etymological lexical resources. In this paper, we propose a set of guidelines to generate such resources, for each step of the life-cycle of an etymological lexicon: creation, update, evaluation, dissemination, and exploitation. To illustrate the guidelines, we introduce EtymDB 2.0, an etymological database automatically generated from the Wiktionary, which contains 1.8 million lexemes, linked by more than 700,000 fine-grained etymological relations, across 2,536 living and dead languages. We also introduce use cases for which EtymDB 2.0 could represent a key resource, such as phylogenetic tree generation, low resource machine translation or medieval languages study."
}