@article{lan-etal-2022-minimum,
title = "Minimum Description Length Recurrent Neural Networks",
author = "Lan, Nur and
Geyer, Michal and
Chemla, Emmanuel and
Katzir, Roni",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.tacl-1.45/",
doi = "10.1162/tacl_a_00489",
pages = "785--799",
abstract = "We train neural networks to optimize a Minimum Description Length score, that is, to balance between the complexity of the network and its accuracy at a task. We show that networks optimizing this objective function master tasks involving memory challenges and go beyond context-free languages. These learners master languages such as anbn, anbncn, anb2n, anbmcn +m, and they perform addition. Moreover, they often do so with 100{\%} accuracy. The networks are small, and their inner workings are transparent. We thus provide formal proofs that their perfect accuracy holds not only on a given test set, but for any input sequence. To our knowledge, no other connectionist model has been shown to capture the underlying grammars for these languages in full generality."
}
Markdown (Informal)
[Minimum Description Length Recurrent Neural Networks](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.tacl-1.45/) (Lan et al., TACL 2022)
ACL