@proceedings{trustnlp-2026-main,
    title = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.0/",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{wang-etal-2026-evaluating-cross,
    title = "Evaluating Cross-Lingual Behavior and Consistency of Multimodal Large Language Models",
    author = "Wang, Hao  and
      Huang, Pinzhi  and
      Kawahara, Daisuke",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.1/",
    pages = "1--20",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{wang-etal-2026-compressed,
    title = "Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall",
    author = {Wang, Qianli  and
      Wang, Mingyang  and
      Feldhus, Nils  and
      Ostermann, Simon  and
      Cao, Yuan  and
      Schuetze, Hinrich  and
      M{\"o}ller, Sebastian  and
      Schmitt, Vera},
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.2/",
    pages = "21--39",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{chen-etal-2026-uncertainty-aware-proxy,
    title = "Uncertainty-Aware Proxy Attribute Reasoning for Reliable Media Bias Detection",
    author = "Chen, Chin-Po  and
      Li, Jeng-Lin  and
      Chang, Ming-Ching",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.4/",
    pages = "40--63",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{topol-2026-quantifying,
    title = "Quantifying {LLM} Safety Degradation Under Repeated Attacks Using Survival Analysis",
    author = "Topol, Zvi",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.5/",
    pages = "64--72",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{liu-sharma-2026-claimclaire,
    title = "{C}laim{CLAIRE}: A Trust-Aware Multi-Component Fact-Checking Agent for Open-World Claims",
    author = "Liu, Xinman  and
      Sharma, Mayank",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.6/",
    pages = "73--91",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{contro-etal-2026-chatbotmanip,
    title = "{C}hatbot{M}anip: a Dataset to Facilitate Evaluation and Oversight of Manipulative Chatbot Behaviour",
    author = "Contro, Jack Luigi Henry  and
      Deol, Simrat  and
      Brandao, Martim  and
      He, Yulan",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.7/",
    pages = "92--107",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{du-etal-2026-controllable,
    title = "Controllable {P}areto Trade-off between Fairness and Accuracy",
    author = "Du, Yongkang  and
      Zhao, Jieyu  and
      Yang, Yijun  and
      Zhou, Tianyi",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.8/",
    pages = "108--120",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{abdelwahab-etal-2026-thinking,
    title = "What are They Thinking? Delineation, Probing, and Tracking of Concepts in {LLM}s",
    author = "Abdelwahab, Mohamed  and
      Collins, Michelle Yu  and
      Chen, Sihan  and
      Zhao, Yi Cheng  and
      Mahmood, Zafarullah  and
      Zhu, Jiading  and
      Ali, Soliman  and
      Rose, Jonathan",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.9/",
    pages = "121--179",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{bakman-etal-2026-hair,
    title = "Hair-Trigger Alignment: Black-Box Evaluation Cannot Guarantee Post-Update Alignment",
    author = "Bakman, Yavuz Faruk  and
      Yaldiz, Duygu Nur  and
      Avestimehr, Salman  and
      Karimireddy, Sai Praneeth",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.10/",
    pages = "180--203",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{stringham-etal-2026-teaching,
    title = "Teaching People {LLM}{'}s Errors and Getting it Right",
    author = "Stringham, Nathan  and
      Hashemi Chaleshtori, Fateme  and
      Yan, Xinyuan  and
      Xu, Zhichao  and
      Wang, Bei  and
      Marasovic, Ana",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.11/",
    pages = "204--226",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{sahoo-etal-2026-linear,
    title = "Linear Probes Detect Task Format, Not Reasoning Mode in Language Model Hidden States",
    author = "Sahoo, Subramanyam  and
      Jain, Vinija  and
      Chadha, Aman  and
      Chaudhary, Divya",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.12/",
    pages = "227--239",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{lee-etal-2026-kolegalqa,
    title = "{K}o{L}egal{QA}: A {K}orean Legal {QA} Dataset for Trustworthy and Explanation-Grounded Legal {AI}",
    author = "Lee, Yongtae  and
      Lee, Surin  and
      Kim, Sumin  and
      Rahman, S M Wahidur  and
      Lee, Heung-No",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.13/",
    pages = "240--255",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{namboothiri-2026-authorization,
    title = "Authorization-First Retrieval: Enforcing Least Privilege in Multi-Agent {RAG} Systems",
    author = "Namboothiri, Rohith",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.15/",
    pages = "256--271",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{nakka-etal-2026-pii,
    title = "{PII} Jailbreaking in {LLM}s via Activation Steering Reveals Personal Information Leakage",
    author = "Nakka, Krishna Kanth  and
      Jiang, Xue  and
      Usynin, Dmitrii  and
      Zhou, Xuebing",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.16/",
    pages = "272--286",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{kim-2026-coercion,
    title = "Coercion Suppression Increases Preference Hallucinations via a Deceptive Bypass in $K$-Level Negotiation Agents",
    author = "Kim, Jihye",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.17/",
    pages = "287--294",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{rinki-etal-2026-purdah,
    title = "Purdah and Patriarchy: Evaluating and Mitigating {S}outh {A}sian Biases in Open-Ended Multilingual {LLM} Generations",
    author = "Rinki, Mamnuya  and
      Raj, Chahat  and
      Mukherjee, Anjishnu  and
      Zhu, Ziwei",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.18/",
    pages = "295--315",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{namboothiri-2026-ghost,
    title = "Ghost Context: Measuring Cross-Context Interference in Long-Context Language Models",
    author = "Namboothiri, Rohith",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.19/",
    pages = "316--329",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{halloran-2026-understanding,
    title = "Understanding the Effects of Safety Unalignment on Reasoning- and Instruction-Tuned Large Language Models",
    author = "Halloran, John Timothy",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.20/",
    pages = "330--341",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{lin-etal-2026-reactod,
    title = "{R}eac{TOD}: Bounded Neuro-Symbolic Agentic {NLU} for Zero-Shot Dialogue State Tracking",
    author = "Lin, Yanjun  and
      Xiao, Zimo  and
      Natarajan, Kartik  and
      Sankaranarayanan, Mahesh  and
      Nawanit, Niraj  and
      Parashar, Rakshit  and
      Zhang, Austin  and
      Konaraddi, Karthik  and
      Mote, Rishita  and
      Niu, Wei",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.21/",
    pages = "342--352",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{du-2026-geometric,
    title = "Geometric Deviation as an Unsupervised Pre-Generation Reliability Signal: Probing {LLM} Representations for Answerability",
    author = "Du, Yucheng",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.22/",
    pages = "353--363",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{al-ghussin-etal-2026-multilingual,
    title = "Multilingual Steering by Design: Multilingual Sparse Autoencoders and Principled Layer Selection",
    author = "Al Ghussin, Yusser  and
      Gurgurov, Daniil  and
      Baeumel, Tanja  and
      Genabith, Josef Van  and
      Schramowski, Patrick  and
      Ostermann, Simon",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.24/",
    pages = "364--401",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{xue-etal-2026-deactivating,
    title = "Deactivating Refusal Triggers: Understanding and Mitigating Overrefusal in Safety Alignment",
    author = "Xue, Zhiyu  and
      Qi, Zimo  and
      Liu, Guangliang  and
      Chen, Bocheng  and
      Pedarsani, Ramtin",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.26/",
    pages = "402--412",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{garani-2026-systematic,
    title = "A Systematic Taxonomy of Failure Modes in Retrieval-Augmented Generation Systems",
    author = "Garani, Anupama",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.27/",
    pages = "413--424",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{huang-etal-2026-improving,
    title = "Improving the Faithfulness of {LLM}-based Abstractive Summarization with Span-level Unlikelihood Training",
    author = "Huang, Sicong  and
      Yan, Qianqi  and
      Wang, Shengze  and
      Lane, Ian",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.28/",
    pages = "425--438",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{kim-harris-2026-context,
    title = "Context Misleads {LLM}s: The Role of Context Filtering in Maintaining Safe Alignment of {LLM}s",
    author = "Kim, Jinhwa  and
      Harris, Ian",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.29/",
    pages = "439--455",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{yeh-etal-2026-lexical,
    title = "Lexical Familiarity Predicts Processing Depth for Nonliteral Language in Large Language Models",
    author = "Yeh, Lang-Ching  and
      Wang, Yu-Chieh  and
      Hsieh, Shu-Kai",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.32/",
    pages = "456--470",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{mittal-2026-forget,
    title = "Did You Forget What {I} Asked? Prospective Memory Failures in Large Language Models",
    author = "Mittal, Avni",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.33/",
    pages = "471--488",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{touchent-2026-dont,
    title = "Don{'}t Want Your {LLM} to Recommend Nuclear Strike? Try Asking It in {J}apanese",
    author = "Touchent, Rian",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.35/",
    pages = "489--502",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{zaghouani-2026-toward,
    title = "Toward Dialect-Aware Safety Evaluation for {A}rabic Large Language Models",
    author = "Zaghouani, Wajdi",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.37/",
    pages = "503--514",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{bugaud-2026-single,
    title = "Single-Layer Activation Edits Easily Corrupt Factual Recall but Rarely Repair It",
    author = "Bugaud, Zacharie",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.38/",
    pages = "515--527",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{su-etal-2026-truth,
    title = "Truth or Dare: Analyzing {LLM} Susceptibility to External Evidence of Varying Factuality",
    author = "Su, Han-Yu  and
      Chu, Kuan-Yu  and
      Li, Yung-Hui  and
      Ku, Lun-Wei",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.39/",
    pages = "528--538",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{zhao-li-2026-halo,
    title = "The Halo Effect and Language Takeover: Spatiotemporal Attention Decay Explains Vision-Language Model Failures in Simple Visual Counting",
    author = "Zhao, Haochen  and
      Li, Sujian",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.40/",
    pages = "539--545",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{qu-etal-2026-chicago,
    title = "Why is ``{C}hicago'' Predictive of Deceptive Reviews? Using {LLM}s to Discover Language Phenomena from Lexical Cues",
    author = "Qu, Jiaming  and
      Guo, Mengtian  and
      Wang, Yue",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.41/",
    pages = "546--556",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{bugaud-2026-domain,
    title = "Domain-Dependent Safety Behavior in Open-Weight {LLM}s: An Empirical Study Across Seven Ethical Domains",
    author = "Bugaud, Zacharie",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.42/",
    pages = "557--562",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{brandl-eberle-2026-systematic,
    title = "A Systematic Comparison between Extractive Self-Explanations and Human Rationales in Text Classification",
    author = "Brandl, Stephanie  and
      Eberle, Oliver",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.44/",
    pages = "563--583",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{hegazy-etal-2026-guiding,
    title = "Guiding Giants: Lightweight Controllers for Weighted Activation Steering in {LLM}s",
    author = "Hegazy, Amr  and
      Elhoushi, Mostafa  and
      Alanwar, Amr",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.46/",
    pages = "584--599",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{mohammad-bayazit-2026-surgellm,
    title = "{SURGELLM}: Rethinking Multi-Task Evaluation through Task-Aware Feature Gating with Class-Balanced Normalization",
    author = "Mohammad, Noor Islam S.  and
      Bayazit, Ulug",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.47/",
    pages = "600--617",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{arif-etal-2026-grain,
    title = "With a Grain of {SALT}: Are {LLM}s Fair Across Social Dimensions?",
    author = "Arif, Samee  and
      Khan, Zohaib  and
      Butt, Maaidah Kaleem  and
      Rashid, Muhammad Suhaib  and
      Raza, Agha Ali  and
      Athar, Awais",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.48/",
    pages = "618--636",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{sermsri-panboonyuen-2026-gatekd,
    title = "{G}ate{KD}: Confidence-Gated Closed-Loop Distillation for Robust Reasoning",
    author = "Sermsri, Kasidit  and
      Panboonyuen, Teerapong",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.49/",
    pages = "637--652",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{ratnakar-vats-2026-geometry,
    title = "The Geometry of Refusal: Linear Instability in Safety-Aligned {LLM}s",
    author = "Ratnakar, Shivam  and
      Vats, Kartikeya",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.51/",
    pages = "653--662",
    ISBN = "979-8-89176-418-7"
}
@inproceedings{kim-kim-2026-conservative,
    title = "The Conservative {AI}: Diagnosing Hold Bias and Reliability Limits in Persona-Based Monetary Policy Simulation",
    author = "Kim, Giyong  and
      Kim, Sojung",
    editor = "Chang, Kai-Wei  and
      Mehrabi, Ninareh  and
      Krishna, Satyapriya  and
      Das, Anubrata  and
      Dhamala, Jwala  and
      Cao, Yang Trista  and
      Kumarage, Tharindu  and
      Ramakrishna, Anil  and
      Christodoulopoulos, Christos  and
      Wan, Yixin  and
      Galystan, Aram  and
      Kumar, Anoop  and
      Gupta, Rahul",
    booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
    month = jul,
    year = "2026",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.52/",
    pages = "663--677",
    ISBN = "979-8-89176-418-7"
}