@inproceedings{goodman-etal-2020-teaforn,
title = "{T}ea{F}or{N}: Teacher-Forcing with N-grams",
author = "Goodman, Sebastian and
Ding, Nan and
Soricut, Radu",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.702/",
doi = "10.18653/v1/2020.emnlp-main.702",
pages = "8704--8717",
abstract = "Sequence generation models trained with teacher-forcing suffer from issues related to exposure bias and lack of differentiability across timesteps. Our proposed method, Teacher-Forcing with N-grams (TeaForN), addresses both these problems directly, through the use of a stack of N decoders trained to decode along a secondary time axis that allows model-parameter updates based on N prediction steps. TeaForN can be used with a wide class of decoder architectures and requires minimal modifications from a standard teacher-forcing setup. Empirically, we show that TeaForN boosts generation quality on one Machine Translation benchmark, WMT 2014 English-French, and two News Summarization benchmarks, CNN/Dailymail and Gigaword."
}
Markdown (Informal)
[TeaForN: Teacher-Forcing with N-grams](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.702/) (Goodman et al., EMNLP 2020)
ACL
- Sebastian Goodman, Nan Ding, and Radu Soricut. 2020. TeaForN: Teacher-Forcing with N-grams. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 8704–8717, Online. Association for Computational Linguistics.