@inproceedings{bollmann-etal-2017-learning,
title = "Learning attention for historical text normalization by learning to pronounce",
author = "Bollmann, Marcel and
Bingel, Joachim and
S{\o}gaard, Anders",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P17-1031/",
doi = "10.18653/v1/P17-1031",
pages = "332--344",
abstract = "Automated processing of historical texts often relies on pre-normalization to modern word forms. Training encoder-decoder architectures to solve such problems typically requires a lot of training data, which is not available for the named task. We address this problem by using several novel encoder-decoder architectures, including a multi-task learning (MTL) architecture using a grapheme-to-phoneme dictionary as auxiliary data, pushing the state-of-the-art by an absolute 2{\%} increase in performance. We analyze the induced models across 44 different texts from Early New High German. Interestingly, we observe that, as previously conjectured, multi-task learning can learn to focus attention during decoding, in ways remarkably similar to recently proposed attention mechanisms. This, we believe, is an important step toward understanding how MTL works."
}
Markdown (Informal)
[Learning attention for historical text normalization by learning to pronounce](https://preview.aclanthology.org/fix-sig-urls/P17-1031/) (Bollmann et al., ACL 2017)
ACL