@inproceedings{mihaylova-etal-2020-understanding,
title = "Understanding the Mechanics of {SPIGOT}: Surrogate Gradients for Latent Structure Learning",
author = "Mihaylova, Tsvetomila and
Niculae, Vlad and
Martins, Andr{\'e} F. T.",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.171/",
doi = "10.18653/v1/2020.emnlp-main.171",
pages = "2186--2202",
abstract = "Latent structure models are a powerful tool for modeling language data: they can mitigate the error propagation and annotation bottleneck in pipeline systems, while simultaneously uncovering linguistic insights about the data. One challenge with end-to-end training of these models is the argmax operation, which has null gradient. In this paper, we focus on surrogate gradients, a popular strategy to deal with this problem. We explore latent structure learning through the angle of pulling back the downstream learning objective. In this paradigm, we discover a principled motivation for both the straight-through estimator (STE) as well as the recently-proposed SPIGOT {--} a variant of STE for structured models. Our perspective leads to new algorithms in the same family. We empirically compare the known and the novel pulled-back estimators against the popular alternatives, yielding new insight for practitioners and revealing intriguing failure cases."
}
Markdown (Informal)
[Understanding the Mechanics of SPIGOT: Surrogate Gradients for Latent Structure Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.171/) (Mihaylova et al., EMNLP 2020)
ACL