@inproceedings{wang-etal-2024-reward, title = "Reward Difference Optimization For Sample Reweighting In Offline {RLHF}", author = "Wang, Shiqi and Zhang, Zhengze and Zhao, Rui and Tan, Fei and Cam-Tu, Nguyen", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.115/", doi = "10.18653/v1/2024.findings-emnlp.115", pages = "2109--2123" }