@inproceedings{lal-etal-2024-automated, title = "Automated Adversarial Discovery for Safety Classifiers", author = "Lal, Yash Kumar and Lahoti, Preethi and Sinha, Aradhana and Qin, Yao and Balashankar, Ananth", editor = "Ovalle, Anaelia and Chang, Kai-Wei and Cao, Yang Trista and Mehrabi, Ninareh and Zhao, Jieyu and Galstyan, Aram and Dhamala, Jwala and Kumar, Anoop and Gupta, Rahul", booktitle = "Proceedings of the 4th Workshop on Trustworthy Natural Language Processing (TrustNLP 2024)", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.trustnlp-1.2/", doi = "10.18653/v1/2024.trustnlp-1.2", pages = "13--26" }