@inproceedings{yuan-strohmaier-2021-cambridge,
title = "{C}ambridge at {S}em{E}val-2021 Task 2: Neural {W}i{C}-Model with Data Augmentation and Exploration of Representation",
author = "Yuan, Zheng and
Strohmaier, David",
editor = "Palmer, Alexis and
Schneider, Nathan and
Schluter, Natalie and
Emerson, Guy and
Herbelot, Aurelie and
Zhu, Xiaodan",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.96/",
doi = "10.18653/v1/2021.semeval-1.96",
pages = "730--737",
abstract = "This paper describes the system of the Cambridge team submitted to the SemEval-2021 shared task on Multilingual and Cross-lingual Word-in-Context Disambiguation. Building on top of a pre-trained masked language model, our system is first pre-trained on out-of-domain data, and then fine-tuned on in-domain data. We demonstrate the effectiveness of the proposed two-step training strategy and the benefits of data augmentation from both existing examples and new resources. We further investigate different representations and show that the addition of distance-based features is helpful in the word-in-context disambiguation task. Our system yields highly competitive results in the cross-lingual track without training on any cross-lingual data; and achieves state-of-the-art results in the multilingual track, ranking first in two languages (Arabic and Russian) and second in French out of 171 submitted systems."
}
Markdown (Informal)
[Cambridge at SemEval-2021 Task 2: Neural WiC-Model with Data Augmentation and Exploration of Representation](https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.96/) (Yuan & Strohmaier, SemEval 2021)
ACL