@inproceedings{miranda-2024-allen,
title = "{A}llen Institute for {AI} @ {SIGTYP} 2024 Shared Task on Word Embedding Evaluation for Ancient and Historical Languages",
author = "Miranda, Lester James V.",
editor = "Hahn, Michael and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Otmakhova, Yulia and
Yang, Jinrui and
Serikov, Oleg and
Rani, Priya and
Ponti, Edoardo M. and
Murado{\u{g}}lu, Saliha and
Gao, Rena and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 6th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.sigtyp-1.18/",
pages = "151--159",
abstract = "In this paper, we describe Allen AI{'}s submission to the constrained track of the SIGTYP 2024 Shared Task. Using only the data provided by the organizers, we pretrained a transformer-based multilingual model, then finetuned it on the Universal Dependencies (UD) annotations of a given language for a downstream task. Our systems achieved decent performance on the test set, beating the baseline in most language-task pairs, yet struggles with subtoken tags in multiword expressions as seen in Coptic and Ancient Hebrew. On the validation set, we obtained {\ensuremath{\geq}}70{\%} F1- score on most language-task pairs. In addition, we also explored the cross-lingual capability of our trained models. This paper highlights our pretraining and finetuning process, and our findings from our internal evaluations."
}
Markdown (Informal)
[Allen Institute for AI @ SIGTYP 2024 Shared Task on Word Embedding Evaluation for Ancient and Historical Languages](https://preview.aclanthology.org/fix-sig-urls/2024.sigtyp-1.18/) (Miranda, SIGTYP 2024)
ACL