@inproceedings{lendvai-wick-2022-finetuning,
title = "Finetuning {L}atin {BERT} for Word Sense Disambiguation on the Thesaurus Linguae Latinae",
author = "Lendvai, Piroska and
Wick, Claudia",
editor = "Zock, Michael and
Chersoni, Emmanuele and
Hsu, Yu-Yin and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on Cognitive Aspects of the Lexicon",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.cogalex-1.5/",
doi = "10.18653/v1/2022.cogalex-1.5",
pages = "37--41",
abstract = "The Thesaurus Linguae Latinae (TLL) is a comprehensive monolingual dictionary that records contextualized meanings and usages of Latin words in antique sources at an unprecedented scale. We created a new dataset based on a subset of sense representations in the TLL, with which we finetuned the Latin-BERT neural language model (Bamman and Burns, 2020) on a supervised Word Sense Disambiguation task. We observe that the contextualized BERT representations finetuned on TLL data score better than static embeddings used in a bidirectional LSTM classifier on the same dataset, and that our per-lemma BERT models achieve higher and more robust performance than reported by Bamman and Burns (2020) based on data from a bilingual Latin dictionary. We demonstrate the differences in sense organizational principles between these two lexical resources, and report about our dataset construction and improved evaluation methodology."
}
Markdown (Informal)
[Finetuning Latin BERT for Word Sense Disambiguation on the Thesaurus Linguae Latinae](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.cogalex-1.5/) (Lendvai & Wick, CogALex 2022)
ACL