@inproceedings{flet-berliac-etal-2024-contrastive, title = "Contrastive Policy Gradient: Aligning {LLM}s on sequence-level scores in a supervised-friendly fashion", author = "Flet-Berliac, Yannis and Grinsztajn, Nathan and Strub, Florian and Choi, Eugene and Wu, Bill and Cremer, Chris and Ahmadian, Arash and Chandak, Yash and Azar, Mohammad Gheshlaghi and Pietquin, Olivier and Geist, Matthieu", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.1190/", doi = "10.18653/v1/2024.emnlp-main.1190", pages = "21353--21370" }