@inproceedings{hang-etal-2024-prior, title = "Prior Constraints-based Reward Model Training for Aligning Large Language Models", author = "Hang, Zhou and Chenglong, Wang and Yimin, Hu and Tong, Xiao and Chunliang, Zhang and Jingbo, Zhu", editor = "Sun, Maosong and Liang, Jiye and Han, Xianpei and Liu, Zhiyuan and He, Yulan", booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)", month = jul, year = "2024", address = "Taiyuan, China", publisher = "Chinese Information Processing Society of China", url = "https://preview.aclanthology.org/fix-sig-urls/2024.ccl-1.107/", pages = "1395--1407", language = "eng" }