@inproceedings{han-etal-2024-reward,
title = "Reward Engineering for Generating Semi-structured Explanation",
author = "Han, Jiuzhou and
Buntine, Wray and
Shareghi, Ehsan",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.findings-eacl.41/",
pages = "589--602",
abstract = "Semi-structured explanation depicts the implicit process of a reasoner with an explicit representation. This explanation highlights how available information in a specific query is utilised and supplemented with information a reasoner produces from its internal weights towards generating an answer. Despite the recent improvements in generative capabilities of language models, producing structured explanations to verify a model`s true reasoning capabilities remains a challenge. This issue is particularly pronounced for not-so-large LMs (e.g., FLAN-T5-XXL). In this work, we first underscore the limitations of supervised fine-tuning (SFT) in tackling this challenge, and then introduce a carefully crafted reward engineering method in reinforcement learning (RL) to better address this problem. We investigate multiple reward aggregation methods and provide a detailed discussion which sheds light on the promising potential of RL for future research. Our proposed method on two semi-structured explanation generation benchmarks (ExplaGraph and COPA-SSE) achieves new state-of-the-art results."
}
Markdown (Informal)
[Reward Engineering for Generating Semi-structured Explanation](https://preview.aclanthology.org/ingest_wac_2008/2024.findings-eacl.41/) (Han et al., Findings 2024)
ACL