@inproceedings{kim-lee-2024-adversarial, title = "Adversarial {DPO}: Harnessing Harmful Data for Reducing Toxicity with Minimal Impact on Coherence and Evasiveness in Dialogue Agents", author = "Kim, San and Lee, Gary", editor = "Duh, Kevin and Gomez, Helena and Bethard, Steven", booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.findings-naacl.118/", doi = "10.18653/v1/2024.findings-naacl.118", pages = "1821--1835" }