@inproceedings{liu-etal-2026-prompt-optimization,
title = "Prompt Optimization for Relation Extraction using Reinforcement Learning",
author = "Liu, Ying and
Shuai, Dong and
Zibo, Cui and
Ye, TengQi and
Wu, Gang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2100/",
pages = "42321--42334",
ISBN = "979-8-89176-395-1",
abstract = "Relation extraction is a fundamental task in information extraction. Still, existing supervised approaches rely heavily on large-scale annotated data, limiting their applicability in domain-specific and low-resource scenarios. Prompt-based methods with large language models provide a parameter-efficient alternative; however, their performance is susceptible to prompt design, which often requires extensive domain expertise and heuristic trial-and-error. We propose REPO, a reinforcement learning-based automated prompt optimization framework for domain relation extraction. REPO formulates prompt construction as a structured, sequential decision-making problem, optimizing prompt quality through interaction with a black-box LLM. To enable efficient and stable optimization, we introduce a two-stage framework comprising an initial prompt-construction stage that generates semantically grounded candidates and a DRL-based refinement stage that iteratively improves prompts within a constrained, domain-aware action space. We further design a composite evaluation metric that integrates extraction accuracy and semantic consistency to serve as a dense reward signal. Extensive experiments on multiple relation extraction datasets across medical, financial, legal, and news domains demonstrate that REPO consistently outperforms existing prompt-based methods and supervised baselines. Ablation studies further confirm the effectiveness and robustness of the proposed DRL-based prompt optimization strategy. Our code is available at \url{https://github.com/dddong2-star/REPO}."
}Markdown (Informal)
[Prompt Optimization for Relation Extraction using Reinforcement Learning](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2100/) (Liu et al., Findings 2026)
ACL