@proceedings{blackboxnlp-ws-2025-1,
    title = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.0/",
    doi = "10.18653/v1/2025.blackboxnlp-1.0",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{gulko-etal-2025-ce,
    title = "{CE}-Bench: Towards a Reliable Contrastive Evaluation Benchmark of Interpretability of Sparse Autoencoders",
    author = "Gulko, Alex  and
      Peng, Yusen  and
      Kumar, Sachin",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.1/",
    doi = "10.18653/v1/2025.blackboxnlp-1.1",
    pages = "1--15",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{beniwal-etal-2025-char,
    title = "Char-mander Use m{B}ackdoor! A Study of Cross-lingual Backdoor Attacks in Multilingual {LLM}s",
    author = "Beniwal, Himanshu  and
      Panda, Sailesh  and
      Srivibhav, Birudugadda  and
      Singh, Mayank",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.2/",
    doi = "10.18653/v1/2025.blackboxnlp-1.2",
    pages = "16--47",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{rakotonirina-etal-2025-evil,
    title = "Evil twins are not that evil: Qualitative insights into machine-generated prompts",
    author = {Rakotonirina, Nathana{\"e}l Carraz  and
      Kervadec, Corentin  and
      Franzon, Francesca  and
      Baroni, Marco},
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.3/",
    doi = "10.18653/v1/2025.blackboxnlp-1.3",
    pages = "48--68",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{arnold-grobner-2025-steering,
    title = "Steering Prepositional Phrases in Language Models: A Case of with-headed Adjectival and Adverbial Complements in Gemma-2",
    author = {Arnold, Stefan  and
      Gr{\"o}bner, Rene},
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.4/",
    doi = "10.18653/v1/2025.blackboxnlp-1.4",
    pages = "69--78",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{jeong-etal-2025-comparative,
    title = "The Comparative Trap: Pairwise Comparisons Amplifies Biased Preferences of {LLM} Evaluators",
    author = "Jeong, Hawon  and
      Park, ChaeHun  and
      Hong, Jimin  and
      Lee, Hojoon  and
      Choo, Jaegul",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.5/",
    doi = "10.18653/v1/2025.blackboxnlp-1.5",
    pages = "79--108",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{macocco-etal-2025-nuisance,
    title = "Not a nuisance but a useful heuristic: Outlier dimensions favor frequent tokens in language models",
    author = "Macocco, Iuri  and
      Graichen, Nora  and
      Boleda, Gemma  and
      Baroni, Marco",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.6/",
    doi = "10.18653/v1/2025.blackboxnlp-1.6",
    pages = "109--136",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{shani-basirat-2025-language,
    title = "Language Dominance in Multilingual Large Language Models",
    author = "Shani, Nadav  and
      Basirat, Ali",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.7/",
    doi = "10.18653/v1/2025.blackboxnlp-1.7",
    pages = "137--148",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{feldhus-kopf-2025-interpreting,
    title = "Interpreting Language Models Through Concept Descriptions: A Survey",
    author = "Feldhus, Nils  and
      Kopf, Laura",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.8/",
    doi = "10.18653/v1/2025.blackboxnlp-1.8",
    pages = "149--162",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{weiss-etal-2025-investigating,
    title = "Investigating {R}e{L}o{RA}: Effects on the Learning Dynamics of Small Language Models",
    author = "Weiss, Yuval  and
      Africa, David Demitri  and
      Buttery, Paula  and
      Diehl Martinez, Richard",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.9/",
    doi = "10.18653/v1/2025.blackboxnlp-1.9",
    pages = "163--175",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{you-etal-2025-lrp,
    title = "When {LRP} Diverges from Leave-One-Out in Transformers",
    author = "You, Weiqiu  and
      Zeng, Siqi  and
      Tsai, Yao-Hung Hubert  and
      Yamada, Makoto  and
      Zhao, Han",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.10/",
    doi = "10.18653/v1/2025.blackboxnlp-1.10",
    pages = "176--188",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{takahashi-etal-2025-understanding,
    title = "Understanding the Side Effects of Rank-One Knowledge Editing",
    author = "Takahashi, Ryosuke  and
      Kamoda, Go  and
      Heinzerling, Benjamin  and
      Sakaguchi, Keisuke  and
      Inui, Kentaro",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.11/",
    doi = "10.18653/v1/2025.blackboxnlp-1.11",
    pages = "189--205",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{parfenova-etal-2025-emergent,
    title = "Emergent Convergence in Multi-Agent {LLM} Annotation",
    author = {Parfenova, Angelina  and
      Denzler, Alexander  and
      Pfeffer, J{\"u}rgen},
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.12/",
    doi = "10.18653/v1/2025.blackboxnlp-1.12",
    pages = "206--225",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{frikha-etal-2025-privacyscalpel,
    title = "{P}rivacy{S}calpel: Enhancing {LLM} Privacy via Interpretable Feature Intervention with Sparse Autoencoders",
    author = "Frikha, Ahmed  and
      Razi, Muhammad Reza Ar  and
      Nakka, Krishna Kanth  and
      Mendes, Ricardo  and
      Jiang, Xue  and
      Zhou, Xuebing",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.13/",
    doi = "10.18653/v1/2025.blackboxnlp-1.13",
    pages = "226--238",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{hanna-etal-2025-circuit,
    title = "Circuit-Tracer: A New Library for Finding Feature Circuits",
    author = "Hanna, Michael  and
      Piotrowski, Mateusz  and
      Lindsey, Jack  and
      Ameisen, Emmanuel",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.14/",
    doi = "10.18653/v1/2025.blackboxnlp-1.14",
    pages = "239--249",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{baeumel-etal-2025-lookahead,
    title = "The Lookahead Limitation: Why Multi-Operand Addition is Hard for {LLM}s",
    author = "Baeumel, Tanja  and
      Genabith, Josef Van  and
      Ostermann, Simon",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.15/",
    doi = "10.18653/v1/2025.blackboxnlp-1.15",
    pages = "250--262",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{anh-etal-2025-llms,
    title = "Can {LLM}s Detect Ambiguous Plural Reference? An Analysis of Split-Antecedent and Mereological Reference",
    author = "Anh, Dang Thi Thao  and
      Nouwen, Rick  and
      Poesio, Massimo",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.16/",
    doi = "10.18653/v1/2025.blackboxnlp-1.16",
    pages = "263--275",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{ozeki-etal-2025-normative,
    title = "Normative Reasoning in Large Language Models: A Comparative Benchmark from Logical and Modal Perspectives",
    author = "Ozeki, Kentaro  and
      Ando, Risako  and
      Morishita, Takanobu  and
      Abe, Hirohiko  and
      Mineshima, Koji  and
      Okada, Mitsuhiro",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.17/",
    doi = "10.18653/v1/2025.blackboxnlp-1.17",
    pages = "276--294",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{funakura-etal-2025-theorem,
    title = "A Theorem-Proving-Based Evaluation of Neural Semantic Parsing",
    author = "Funakura, Hayate  and
      Kim, Hyunsoo  and
      Mineshima, Koji",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.18/",
    doi = "10.18653/v1/2025.blackboxnlp-1.18",
    pages = "295--306",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{sigrist-waldis-2025-pipeline,
    title = "A Pipeline to Assess Merging Methods via Behavior and Internals",
    author = "Sigrist, Yutaro  and
      Waldis, Andreas",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.19/",
    doi = "10.18653/v1/2025.blackboxnlp-1.19",
    pages = "307--316",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{zhang-etal-2025-bert,
    title = "From {BERT} to {LLM}s: Comparing and Understanding {C}hinese Classifier Prediction in Language Models",
    author = "Zhang, Ziqi  and
      Ma, Jianfei  and
      Chersoni, Emmanuele  and
      Jieshun, You  and
      Feng, Zhaoxin",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.20/",
    doi = "10.18653/v1/2025.blackboxnlp-1.20",
    pages = "317--329",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{cho-etal-2025-mechanistic,
    title = "Mechanistic Fine-tuning for In-context Learning",
    author = "Cho, Hakaze  and
      Luo, Peng  and
      Kato, Mariko  and
      Kaenbyou, Rin  and
      Inoue, Naoya",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.21/",
    doi = "10.18653/v1/2025.blackboxnlp-1.21",
    pages = "330--357",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{lucchetti-guha-2025-understanding,
    title = "Understanding How {C}ode{LLM}s (Mis)Predict Types with Activation Steering",
    author = "Lucchetti, Francesca  and
      Guha, Arjun",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.22/",
    doi = "10.18653/v1/2025.blackboxnlp-1.22",
    pages = "358--397",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{conti-etal-2025-unheard,
    title = "The Unheard Alternative: Contrastive Explanations for Speech-to-Text Models",
    author = "Conti, Lina  and
      Fucci, Dennis  and
      Gaido, Marco  and
      Negri, Matteo  and
      Wisniewski, Guillaume  and
      Bentivogli, Luisa",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.23/",
    doi = "10.18653/v1/2025.blackboxnlp-1.23",
    pages = "398--414",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{li-etal-2025-exploring-large,
    title = "Exploring Large Language Models' World Perception: A Multi-Dimensional Evaluation through Data Distribution",
    author = "Li, Zhi  and
      Yang, Jing  and
      Liu, Ying",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.24/",
    doi = "10.18653/v1/2025.blackboxnlp-1.24",
    pages = "415--432",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{morand-etal-2025-representations,
    title = "On the Representations of Entities in Auto-regressive Large Language Models",
    author = "Morand, Victor  and
      Mothe, Josiane  and
      Piwowarski, Benjamin",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.25/",
    doi = "10.18653/v1/2025.blackboxnlp-1.25",
    pages = "433--451",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{xie-etal-2025-language,
    title = "Can Language Neuron Intervention Reduce Non-Target Language Output?",
    author = "Xie, Suchun  and
      Kim, Hwichan  and
      Sasaki, Shota  and
      Yamada, Kosuke  and
      Suzuki, Jun",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.26/",
    doi = "10.18653/v1/2025.blackboxnlp-1.26",
    pages = "452--466",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{du-etal-2025-fine,
    title = "Fine-Grained Manipulation of Arithmetic Neurons",
    author = "Du, Wenyu  and
      Zheng, Rui  and
      Luo, Tongxu  and
      Chung, Stephen  and
      Fu, Jie",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.27/",
    doi = "10.18653/v1/2025.blackboxnlp-1.27",
    pages = "467--479",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{kirch-etal-2025-features,
    title = "What Features in Prompts Jailbreak {LLM}s? Investigating the Mechanisms Behind Attacks",
    author = "Kirch, Nathalie Maria  and
      Weisser, Constantin Niko  and
      Field, Severin  and
      Yannakoudakis, Helen  and
      Casper, Stephen",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.28/",
    doi = "10.18653/v1/2025.blackboxnlp-1.28",
    pages = "480--520",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{nikankin-etal-2025-blackboxnlp,
    title = "{B}lackbox{NLP}-2025 {MIB} Shared Task: Improving Circuit Faithfulness via Better Edge Selection",
    author = "Nikankin, Yaniv  and
      Arad, Dana  and
      Itzhak, Itay  and
      Reusch, Anja  and
      Simhi, Adi  and
      Kesten, Gal  and
      Belinkov, Yonatan",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.29/",
    doi = "10.18653/v1/2025.blackboxnlp-1.29",
    pages = "521--527",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{brunello-etal-2025-blackboxnlp,
    title = "{B}lackbox{NLP}-2025 {MIB} Shared Task: {IPE}: Isolating Path Effects for Improving Latent Circuit Identification",
    author = "Brunello, Nicol{\`o}  and
      Cerutti, Andrea  and
      Sassella, Andrea  and
      Carman, Mark James",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.30/",
    doi = "10.18653/v1/2025.blackboxnlp-1.30",
    pages = "528--536",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{mondorf-etal-2025-blackboxnlp,
    title = "{B}lackbox{NLP}-2025 {MIB} Shared Task: Exploring Ensemble Strategies for Circuit Localization Methods",
    author = "Mondorf, Philipp  and
      Wang, Mingyang  and
      Gerstner, Sebastian  and
      Hakimi, Ahmad Dawar  and
      Liu, Yihong  and
      Veloso, Leonor  and
      Zhou, Shijia  and
      Schuetze, Hinrich  and
      Plank, Barbara",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.31/",
    doi = "10.18653/v1/2025.blackboxnlp-1.31",
    pages = "537--542",
    ISBN = "979-8-89176-346-3"
}
@inproceedings{arad-etal-2025-findings,
    title = "Findings of the {B}lackbox{NLP} 2025 Shared Task: Localizing Circuits and Causal Variables in Language Models",
    author = "Arad, Dana  and
      Belinkov, Yonatan  and
      Chen, Hanjie  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Sarti, Gabriele  and
      Tutek, Martin",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2025.blackboxnlp-1.32/",
    doi = "10.18653/v1/2025.blackboxnlp-1.32",
    pages = "543--552",
    ISBN = "979-8-89176-346-3"
}
