@inproceedings{nguyen-etal-2017-reinforcement,
title = "Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback",
author = "Nguyen, Khanh and
Daum{\'e} III, Hal and
Boyd-Graber, Jordan",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D17-1153/",
doi = "10.18653/v1/D17-1153",
pages = "1464--1474",
abstract = "Machine translation is a natural candidate problem for reinforcement learning from human feedback: users provide quick, dirty ratings on candidate translations to guide a system to improve. Yet, current neural machine translation training focuses on expensive human-generated reference translations. We describe a reinforcement learning algorithm that improves neural machine translation systems from simulated human feedback. Our algorithm combines the advantage actor-critic algorithm (Mnih et al., 2016) with the attention-based neural encoder-decoder architecture (Luong et al., 2015). This algorithm (a) is well-designed for problems with a large action space and delayed rewards, (b) effectively optimizes traditional corpus-level machine translation metrics, and (c) is robust to skewed, high-variance, granular feedback modeled after actual human behaviors."
}
Markdown (Informal)
[Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback](https://preview.aclanthology.org/jlcl-multiple-ingestion/D17-1153/) (Nguyen et al., EMNLP 2017)
ACL