@inproceedings{plank-etal-2020-dan,
title = "{D}a{N}+: {D}anish Nested Named Entities and Lexical Normalization",
author = "Plank, Barbara and
Jensen, Kristian N{\o}rgaard and
van der Goot, Rob",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.coling-main.583/",
doi = "10.18653/v1/2020.coling-main.583",
pages = "6649--6662",
abstract = "This paper introduces DAN+, a new multi-domain corpus and annotation guidelines for Dan-ish nested named entities (NEs) and lexical normalization to support research on cross-lingualcross-domain learning for a less-resourced language. We empirically assess three strategies tomodel the two-layer Named Entity Recognition (NER) task. We compare transfer capabilitiesfrom German versus in-language annotation from scratch. We examine language-specific versusmultilingual BERT, and study the effect of lexical normalization on NER. Our results show that 1) the most robust strategy is multi-task learning which is rivaled by multi-label decoding, 2) BERT-based NER models are sensitive to domain shifts, and 3) in-language BERT and lexicalnormalization are the most beneficial on the least canonical data. Our results also show that anout-of-domain setup remains challenging, while performance on news plateaus quickly. Thishighlights the importance of cross-domain evaluation of cross-lingual transfer."
}
Markdown (Informal)
[DaN+: Danish Nested Named Entities and Lexical Normalization](https://preview.aclanthology.org/fix-sig-urls/2020.coling-main.583/) (Plank et al., COLING 2020)
ACL
- Barbara Plank, Kristian Nørgaard Jensen, and Rob van der Goot. 2020. DaN+: Danish Nested Named Entities and Lexical Normalization. In Proceedings of the 28th International Conference on Computational Linguistics, pages 6649–6662, Barcelona, Spain (Online). International Committee on Computational Linguistics.