@inproceedings{xue-etal-2026-deactivating, title = "Deactivating Refusal Triggers: Understanding and Mitigating Overrefusal in Safety Alignment", author = "Xue, Zhiyu and Qi, Zimo and Liu, Guangliang and Chen, Bocheng and Pedarsani, Ramtin", editor = "Chang, Kai-Wei and Mehrabi, Ninareh and Krishna, Satyapriya and Das, Anubrata and Dhamala, Jwala and Cao, Yang Trista and Kumarage, Tharindu and Ramakrishna, Anil and Christodoulopoulos, Christos and Wan, Yixin and Galystan, Aram and Kumar, Anoop and Gupta, Rahul", booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)", month = jul, year = "2026", address = "San Diego, California", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.26/", pages = "402--412", ISBN = "979-8-89176-418-7" }