@inproceedings{ghosh-srivastava-2021-mapping-language,
title = "Mapping Language to Programs using Multiple Reward Components with Inverse Reinforcement Learning",
author = "Ghosh, Sayan and
Srivastava, Shashank",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.findings-emnlp.125/",
doi = "10.18653/v1/2021.findings-emnlp.125",
pages = "1449--1462",
abstract = "Mapping natural language instructions to programs that computers can process is a fundamental challenge. Existing approaches focus on likelihood-based training or using reinforcement learning to fine-tune models based on a single reward. In this paper, we pose program generation from language as Inverse Reinforcement Learning. We introduce several interpretable reward components and jointly learn (1) a reward function that linearly combines them, and (2) a policy for program generation. Fine-tuning with our approach achieves significantly better performance than competitive methods using Reinforcement Learning (RL). On the VirtualHome framework, we get improvements of up to 9.0{\%} on the Longest Common Subsequence metric and 14.7{\%} on recall-based metrics over previous work on this framework (Puig et al., 2018). The approach is data-efficient, showing larger gains in performance in the low-data regime. Generated programs are also preferred by human evaluators over an RL-based approach, and rated higher on relevance, completeness, and human-likeness."
}
Markdown (Informal)
[Mapping Language to Programs using Multiple Reward Components with Inverse Reinforcement Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.findings-emnlp.125/) (Ghosh & Srivastava, Findings 2021)
ACL