@inproceedings{lippincott-etal-2019-jhu,
title = "{JHU} System Description for the {MADAR} {A}rabic Dialect Identification Shared Task",
author = "Lippincott, Tom and
Shapiro, Pamela and
Duh, Kevin and
McNamee, Paul",
editor = "El-Hajj, Wassim and
Belguith, Lamia Hadrich and
Bougares, Fethi and
Magdy, Walid and
Zitouni, Imed and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/W19-4634/",
doi = "10.18653/v1/W19-4634",
pages = "264--268",
abstract = "Our submission to the MADAR shared task on Arabic dialect identification employed a language modeling technique called Prediction by Partial Matching, an ensemble of neural architectures, and sources of additional data for training word embeddings and auxiliary language models. We found several of these techniques provided small boosts in performance, though a simple character-level language model was a strong baseline, and a lower-order LM achieved best performance on Subtask 2. Interestingly, word embeddings provided no consistent benefit, and ensembling struggled to outperform the best component submodel. This suggests the variety of architectures are learning redundant information, and future work may focus on encouraging decorrelated learning."
}
Markdown (Informal)
[JHU System Description for the MADAR Arabic Dialect Identification Shared Task](https://preview.aclanthology.org/fix-sig-urls/W19-4634/) (Lippincott et al., WANLP 2019)
ACL