@inproceedings{flet-berliac-etal-2024-contrastive,
    title = "Contrastive Policy Gradient: Aligning {LLM}s on sequence-level scores in a supervised-friendly fashion",
    author = "Flet-Berliac, Yannis  and
      Grinsztajn, Nathan  and
      Strub, Florian  and
      Choi, Eugene  and
      Wu, Bill  and
      Cremer, Chris  and
      Ahmadian, Arash  and
      Chandak, Yash  and
      Azar, Mohammad Gheshlaghi  and
      Pietquin, Olivier  and
      Geist, Matthieu",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.1190/",
    doi = "10.18653/v1/2024.emnlp-main.1190",
    pages = "21353--21370"
}