@inproceedings{xia-etal-2024-inverse,
    title = "Inverse-{Q}*: Token Level Reinforcement Learning for Aligning Large Language Models Without Preference Data",
    author = "Xia, Han  and
      Gao, Songyang  and
      Ge, Qiming  and
      Xi, Zhiheng  and
      Zhang, Qi  and
      Huang, Xuanjing",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-emnlp.478/",
    doi = "10.18653/v1/2024.findings-emnlp.478",
    pages = "8178--8188"
}