@inproceedings{chen-etal-2024-improving-discriminative, title = "Improving Discriminative Capability of Reward Models in {RLHF} Using Contrastive Learning", author = "Chen, Lu and Zheng, Rui and Wang, Binghai and Jin, Senjie and Huang, Caishuang and Ye, Junjie and Zhang, Zhihao and Zhou, Yuhao and Xi, Zhiheng and Gui, Tao and Zhang, Qi and Huang, Xuanjing", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.852/", doi = "10.18653/v1/2024.emnlp-main.852", pages = "15270--15283" }