@inproceedings{parshakova-etal-2019-global,
title = "Global Autoregressive Models for Data-Efficient Sequence Learning",
author = "Parshakova, Tetiana and
Andreoli, Jean-Marc and
Dymetman, Marc",
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/K19-1084/",
doi = "10.18653/v1/K19-1084",
pages = "900--909",
abstract = "Standard autoregressive seq2seq models are easily trained by max-likelihood, but tend to show poor results under small-data conditions. We introduce a class of seq2seq models, GAMs (Global Autoregressive Models), which combine an autoregressive component with a log-linear component, allowing the use of global \textit{a priori} features to compensate for lack of data. We train these models in two steps. In the first step, we obtain an \textit{unnormalized} GAM that maximizes the likelihood of the data, but is improper for fast inference or evaluation. In the second step, we use this GAM to train (by distillation) a second autoregressive model that approximates the \textit{normalized} distribution associated with the GAM, and can be used for fast inference and evaluation. Our experiments focus on language modelling under synthetic conditions and show a strong perplexity reduction of using the second autoregressive model over the standard one."
}
Markdown (Informal)
[Global Autoregressive Models for Data-Efficient Sequence Learning](https://preview.aclanthology.org/fix-sig-urls/K19-1084/) (Parshakova et al., CoNLL 2019)
ACL