@inproceedings{rita-etal-2024-countering,
    title = "Countering Reward Over-Optimization in {LLM} with Demonstration-Guided Reinforcement Learning",
    author = "Rita, Mathieu  and
      Strub, Florian  and
      Chaabouni, Rahma  and
      Michel, Paul  and
      Dupoux, Emmanuel  and
      Pietquin, Olivier",
    editor = "Ku, Lun-Wei  and
      Martins, Andre  and
      Srikumar, Vivek",
    booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
    month = aug,
    year = "2024",
    address = "Bangkok, Thailand",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-acl.740/",
    doi = "10.18653/v1/2024.findings-acl.740",
    pages = "12447--12472"
}