@inproceedings{wang-steinert-threlkeld-2023-evaluating,
title = "Evaluating Transformer`s Ability to Learn Mildly Context-Sensitive Languages",
author = "Wang, Shunjie and
Steinert-Threlkeld, Shane",
editor = "Belinkov, Yonatan and
Hao, Sophie and
Jumelet, Jaap and
Kim, Najoung and
McCarthy, Arya and
Mohebbi, Hosein",
booktitle = "Proceedings of the 6th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.blackboxnlp-1.21/",
doi = "10.18653/v1/2023.blackboxnlp-1.21",
pages = "271--283",
abstract = "Despite the fact that Transformers perform well in NLP tasks, recent studies suggest that self-attention is theoretically limited in learning even some regular and context-free languages. These findings motivated us to think about their implications in modeling natural language, which is hypothesized to be mildly context-sensitive. We test the Transformer`s ability to learn mildly context-sensitive languages of varying complexities, and find that they generalize well to unseen in-distribution data, but their ability to extrapolate to longer strings is worse than that of LSTMs. Our analyses show that the learned self-attention patterns and representations modeled dependency relations and demonstrated counting behavior, which may have helped the models solve the languages."
}
Markdown (Informal)
[Evaluating Transformer’s Ability to Learn Mildly Context-Sensitive Languages](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.blackboxnlp-1.21/) (Wang & Steinert-Threlkeld, BlackboxNLP 2023)
ACL