@inproceedings{edman-etal-2021-importance,
title = "The Importance of Context in Very Low Resource Language Modeling",
author = "Edman, Lukas and
Toral, Antonio and
van Noord, Gertjan",
editor = "Bandyopadhyay, Sivaji and
Devi, Sobha Lalitha and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 18th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2021",
address = "National Institute of Technology Silchar, Silchar, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.icon-main.12/",
pages = "86--92",
abstract = "This paper investigates very low resource language model pretraining, when less than 100 thousand sentences are available. We find that, in very low-resource scenarios, statistical n-gram language models outperform state-of-the-art neural models. Our experiments show that this is mainly due to the focus of the former on a local context. As such, we introduce three methods to improve a neural model{'}s performance in the low-resource setting, finding that limiting the model{'}s self-attention is the most effective one, improving on downstream tasks such as NLI and POS tagging by up to 5{\%} for the languages we test on: English, Hindi, and Turkish."
}
Markdown (Informal)
[The Importance of Context in Very Low Resource Language Modeling](https://preview.aclanthology.org/fix-sig-urls/2021.icon-main.12/) (Edman et al., ICON 2021)
ACL