@inproceedings{sharaf-daume-iii-2017-structured,
title = "Structured Prediction via Learning to Search under Bandit Feedback",
author = "Sharaf, Amr and
Daum{\'e} III, Hal",
editor = "Chang, Kai-Wei and
Chang, Ming-Wei and
Srikumar, Vivek and
Rush, Alexander M.",
booktitle = "Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/W17-4304/",
doi = "10.18653/v1/W17-4304",
pages = "17--26",
abstract = "We present an algorithm for structured prediction under online bandit feedback. The learner repeatedly predicts a sequence of actions, generating a structured output. It then observes feedback for that output and no others. We consider two cases: a pure bandit setting in which it only observes a loss, and more fine-grained feedback in which it observes a loss for every action. We find that the fine-grained feedback is necessary for strong empirical performance, because it allows for a robust variance-reduction strategy. We empirically compare a number of different algorithms and exploration methods and show the efficacy of BLS on sequence labeling and dependency parsing tasks."
}
Markdown (Informal)
[Structured Prediction via Learning to Search under Bandit Feedback](https://preview.aclanthology.org/fix-sig-urls/W17-4304/) (Sharaf & Daumé III, 2017)
ACL