@inproceedings{kim-lee-2024-adversarial,
    title = "Adversarial {DPO}: Harnessing Harmful Data for Reducing Toxicity with Minimal Impact on Coherence and Evasiveness in Dialogue Agents",
    author = "Kim, San  and
      Lee, Gary",
    editor = "Duh, Kevin  and
      Gomez, Helena  and
      Bethard, Steven",
    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
    month = jun,
    year = "2024",
    address = "Mexico City, Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.findings-naacl.118/",
    doi = "10.18653/v1/2024.findings-naacl.118",
    pages = "1821--1835"
}