@inproceedings{lin-etal-2024-towards-understanding, title = "Towards Understanding Jailbreak Attacks in {LLM}s: A Representation Space Analysis", author = "Lin, Yuping and He, Pengfei and Xu, Han and Xing, Yue and Yamada, Makoto and Liu, Hui and Tang, Jiliang", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.401/", doi = "10.18653/v1/2024.emnlp-main.401", pages = "7067--7085" }