@inproceedings{labat-lefever-2019-classification,
title = "A Classification-Based Approach to Cognate Detection Combining Orthographic and Semantic Similarity Information",
author = "Labat, Sofie and
Lefever, Els",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/R19-1071/",
doi = "10.26615/978-954-452-056-4_071",
pages = "602--610",
abstract = "This paper presents proof-of-concept experiments for combining orthographic and semantic information to distinguish cognates from non-cognates. To this end, a context-independent gold standard is developed by manually labelling English-Dutch pairs of cognates and false friends in bilingual term lists. These annotated cognate pairs are then used to train and evaluate a supervised binary classification system for the automatic detection of cognates. Two types of information sources are incorporated in the classifier: fifteen string similarity metrics capture form similarity between source and target words, while word embeddings model semantic similarity between the words. The experimental results show that even though the system already achieves good results by only incorporating orthographic information, the performance further improves by including semantic information in the form of embeddings."
}
Markdown (Informal)
[A Classification-Based Approach to Cognate Detection Combining Orthographic and Semantic Similarity Information](https://preview.aclanthology.org/add-emnlp-2024-awards/R19-1071/) (Labat & Lefever, RANLP 2019)
ACL