@inproceedings{gombert-bartsch-2021-tuda,
title = "{TUDA}-{CCL} at {S}em{E}val-2021 Task 1: Using Gradient-boosted Regression Tree Ensembles Trained on a Heterogeneous Feature Set for Predicting Lexical Complexity",
author = "Gombert, Sebastian and
Bartsch, Sabine",
editor = "Palmer, Alexis and
Schneider, Nathan and
Schluter, Natalie and
Emerson, Guy and
Herbelot, Aurelie and
Zhu, Xiaodan",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.semeval-1.12/",
doi = "10.18653/v1/2021.semeval-1.12",
pages = "130--137",
abstract = "In this paper, we present our systems submitted to SemEval-2021 Task 1 on lexical complexity prediction. The aim of this shared task was to create systems able to predict the lexical complexity of word tokens and bigram multiword expressions within a given sentence context, a continuous value indicating the difficulty in understanding a respective utterance. Our approach relies on gradient boosted regression tree ensembles fitted using a heterogeneous feature set combining linguistic features, static and contextualized word embeddings, psycholinguistic norm lexica, WordNet, word- and character bigram frequencies and inclusion in wordlists to create a model able to assign a word or multiword expression a context-dependent complexity score. We can show that especially contextualised string embeddings can help with predicting lexical complexity."
}
Markdown (Informal)
[TUDA-CCL at SemEval-2021 Task 1: Using Gradient-boosted Regression Tree Ensembles Trained on a Heterogeneous Feature Set for Predicting Lexical Complexity](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.semeval-1.12/) (Gombert & Bartsch, SemEval 2021)
ACL