@proceedings{blackboxnlp-2024-1,
    title = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.0/",
    doi = "10.18653/v1/2024.blackboxnlp-1.0"
}
@inproceedings{lymperopoulos-etal-2024-optimal,
    title = "Optimal and efficient text counterfactuals using Graph Neural Networks",
    author = "Lymperopoulos, Dimitris  and
      Lymperaiou, Maria  and
      Filandrianos, Giorgos  and
      Stamou, Giorgos",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.1/",
    doi = "10.18653/v1/2024.blackboxnlp-1.1",
    pages = "1--14"
}
@inproceedings{arnold-etal-2024-routing,
    title = "Routing in Sparsely-gated Language Models responds to Context",
    author = "Arnold, Stefan  and
      Fietta, Marian  and
      Yesilbas, Dilara",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.2/",
    doi = "10.18653/v1/2024.blackboxnlp-1.2",
    pages = "15--22"
}
@inproceedings{nastase-merlo-2024-identifiable,
    title = "Are there identifiable structural parts in the sentence embedding whole?",
    author = "Nastase, Vivi  and
      Merlo, Paola",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.3/",
    doi = "10.18653/v1/2024.blackboxnlp-1.3",
    pages = "23--42"
}
@inproceedings{leybzon-kervadec-2024-learning,
    title = "Learning, Forgetting, Remembering: Insights From Tracking {LLM} Memorization During Training",
    author = "Leybzon, Danny D.  and
      Kervadec, Corentin",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.4/",
    doi = "10.18653/v1/2024.blackboxnlp-1.4",
    pages = "43--57"
}
@inproceedings{hollinsworth-etal-2024-language,
    title = "Language Models Linearly Represent Sentiment",
    author = "Tigges, Curt  and
      Hollinsworth, Oskar J.  and
      Geiger, Atticus  and
      Nanda, Neel",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.5/",
    doi = "10.18653/v1/2024.blackboxnlp-1.5",
    pages = "58--87"
}
@inproceedings{ji-etal-2024-llm,
    title = "{LLM} Internal States Reveal Hallucination Risk Faced With a Query",
    author = "Ji, Ziwei  and
      Chen, Delong  and
      Ishii, Etsuko  and
      Cahyawijaya, Samuel  and
      Bang, Yejin  and
      Wilie, Bryan  and
      Fung, Pascale",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.6/",
    doi = "10.18653/v1/2024.blackboxnlp-1.6",
    pages = "88--104"
}
@inproceedings{koulakos-etal-2024-enhancing,
    title = "Enhancing adversarial robustness in Natural Language Inference using explanations",
    author = "Koulakos, Alexandros  and
      Lymperaiou, Maria  and
      Filandrianos, Giorgos  and
      Stamou, Giorgos",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.7/",
    doi = "10.18653/v1/2024.blackboxnlp-1.7",
    pages = "105--117"
}
@inproceedings{goldfarb-tarrant-etal-2024-multicontrievers,
    title = "{M}ulti{C}ontrievers: Analysis of Dense Retrieval Representations",
    author = "Goldfarb-Tarrant, Seraphina  and
      Rodriguez, Pedro  and
      Dwivedi-Yu, Jane  and
      Lewis, Patrick",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.8/",
    doi = "10.18653/v1/2024.blackboxnlp-1.8",
    pages = "118--139"
}
@inproceedings{armengol-estape-etal-2024-statically,
    title = "Can We Statically Locate Knowledge in Large Language Models? Financial Domain and Toxicity Reduction Case Studies",
    author = "Armengol-Estap{\'e}, Jordi  and
      Li, Lingyu  and
      Gehrmann, Sebastian  and
      Gopal, Achintya  and
      Rosenberg, David S  and
      Mann, Gideon S.  and
      Dredze, Mark",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.9/",
    doi = "10.18653/v1/2024.blackboxnlp-1.9",
    pages = "140--176"
}
@inproceedings{artzy-schwartz-2024-attend,
    title = "Attend First, Consolidate Later: On the Importance of Attention in Different {LLM} Layers",
    author = "Artzy, Amit Ben  and
      Schwartz, Roy",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.10/",
    doi = "10.18653/v1/2024.blackboxnlp-1.10",
    pages = "177--184"
}
@inproceedings{gupta-etal-2024-enhancing,
    title = "Enhancing Question Answering on Charts Through Effective Pre-training Tasks",
    author = "Gupta, Ashim  and
      Gupta, Vivek  and
      Zhang, Shuo  and
      He, Yujie  and
      Zhang, Ning  and
      Shah, Shalin",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.11/",
    doi = "10.18653/v1/2024.blackboxnlp-1.11",
    pages = "185--192"
}
@inproceedings{manna-sett-2024-faithfulness,
    title = "Faithfulness and the Notion of Adversarial Sensitivity in {NLP} Explanations",
    author = "Manna, Supriya  and
      Sett, Niladri",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.12/",
    doi = "10.18653/v1/2024.blackboxnlp-1.12",
    pages = "193--206"
}
@inproceedings{chen-etal-2024-transformers,
    title = "Transformers Learn Transition Dynamics when Trained to Predict {M}arkov Decision Processes",
    author = "Chen, Yuxi  and
      Ma, Suwei  and
      Dear, Tony  and
      Chen, Xu",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.13/",
    doi = "10.18653/v1/2024.blackboxnlp-1.13",
    pages = "207--216"
}
@inproceedings{bolliger-etal-2024-alignment,
    title = "On the alignment of {LM} language generation and human language comprehension",
    author = {Bolliger, Lena Sophia  and
      Haller, Patrick  and
      J{\"a}ger, Lena Ann},
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.14/",
    doi = "10.18653/v1/2024.blackboxnlp-1.14",
    pages = "217--231"
}
@inproceedings{janiak-etal-2024-adversarial,
    title = "An Adversarial Example for Direct Logit Attribution: Memory Management in {GELU}-4{L}",
    author = "Janiak, Jett  and
      Rager, Can  and
      Dao, James  and
      Lau, Yeu-Tong",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.15/",
    doi = "10.18653/v1/2024.blackboxnlp-1.15",
    pages = "232--237"
}
@inproceedings{a-shams-etal-2024-uncovering,
    title = "Uncovering Syllable Constituents in the Self-Attention-Based Speech Representations of Whisper",
    author = "A Shams, Erfan  and
      Gessinger, Iona  and
      Carson-Berndsen, Julie",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.16/",
    doi = "10.18653/v1/2024.blackboxnlp-1.16",
    pages = "238--247"
}
@inproceedings{csordas-etal-2024-recurrent,
    title = "Recurrent Neural Networks Learn to Store and Generate Sequences using Non-Linear Representations",
    author = "Csord{\'a}s, R{\'o}bert  and
      Potts, Christopher  and
      Manning, Christopher D  and
      Geiger, Atticus",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.17/",
    doi = "10.18653/v1/2024.blackboxnlp-1.17",
    pages = "248--262"
}
@inproceedings{kauf-etal-2024-log,
    title = "Log Probabilities Are a Reliable Estimate of Semantic Plausibility in Base and Instruction-Tuned Language Models",
    author = "Kauf, Carina  and
      Chersoni, Emmanuele  and
      Lenci, Alessandro  and
      Fedorenko, Evelina  and
      Ivanova, Anna A",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.18/",
    doi = "10.18653/v1/2024.blackboxnlp-1.18",
    pages = "263--277"
}
@inproceedings{lieberum-etal-2024-gemma,
    title = "Gemma Scope: Open Sparse Autoencoders Everywhere All At Once on Gemma 2",
    author = "Lieberum, Tom  and
      Rajamanoharan, Senthooran  and
      Conmy, Arthur  and
      Smith, Lewis  and
      Sonnerat, Nicolas  and
      Varma, Vikrant  and
      Kramar, Janos  and
      Dragan, Anca  and
      Shah, Rohin  and
      Nanda, Neel",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.19/",
    doi = "10.18653/v1/2024.blackboxnlp-1.19",
    pages = "278--300"
}
@inproceedings{gupta-etal-2024-self,
    title = "Self-Assessment Tests are Unreliable Measures of {LLM} Personality",
    author = "Gupta, Akshat  and
      Song, Xiaoyang  and
      Anumanchipalli, Gopala",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.20/",
    doi = "10.18653/v1/2024.blackboxnlp-1.20",
    pages = "301--314"
}
@inproceedings{amirzadeh-etal-2024-language,
    title = "How Language Models Prioritize Contextual Grammatical Cues?",
    author = "Amirzadeh, Hamidreza  and
      Alishahi, Afra  and
      Mohebbi, Hosein",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.21/",
    doi = "10.18653/v1/2024.blackboxnlp-1.21",
    pages = "315--336"
}
@inproceedings{mcdougall-etal-2024-copy,
    title = "Copy Suppression: Comprehensively Understanding a Motif in Language Model Attention Heads",
    author = "McDougall, Callum Stuart  and
      Conmy, Arthur  and
      Rushing, Cody  and
      McGrath, Thomas  and
      Nanda, Neel",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.22/",
    doi = "10.18653/v1/2024.blackboxnlp-1.22",
    pages = "337--363"
}
@inproceedings{mohammadi-etal-2024-welldunn,
    title = "{W}ell{D}unn: On the Robustness and Explainability of Language Models and Large Language Models in Identifying Wellness Dimensions",
    author = "Mohammadi, Seyedali  and
      Raff, Edward  and
      Malekar, Jinendra  and
      Palit, Vedant  and
      Ferraro, Francis  and
      Gaur, Manas",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.23/",
    doi = "10.18653/v1/2024.blackboxnlp-1.23",
    pages = "364--388"
}
@inproceedings{chiang-lee-2024-metadata,
    title = "Do Metadata and Appearance of the Retrieved Webpages Affect {LLM}{'}s Reasoning in Retrieval-Augmented Generation?",
    author = "Chiang, Cheng-Han  and
      Lee, Hung-yi",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.24/",
    doi = "10.18653/v1/2024.blackboxnlp-1.24",
    pages = "389--406"
}
@inproceedings{syed-etal-2024-attribution,
    title = "Attribution Patching Outperforms Automated Circuit Discovery",
    author = "Syed, Aaquib  and
      Rager, Can  and
      Conmy, Arthur",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.25/",
    doi = "10.18653/v1/2024.blackboxnlp-1.25",
    pages = "407--416"
}
@inproceedings{hasan-etal-2024-pruning,
    title = "Pruning for Protection: Increasing Jailbreak Resistance in Aligned {LLM}s Without Fine-Tuning",
    author = "Hasan, Adib  and
      Rugina, Ileana  and
      Wang, Alex",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.26/",
    doi = "10.18653/v1/2024.blackboxnlp-1.26",
    pages = "417--430"
}
@inproceedings{xie-etal-2024-ivra,
    title = "{I}v{RA}: A Framework to Enhance Attention-Based Explanations for Language Models with Interpretability-Driven Training",
    author = "Xie, Sean  and
      Vosoughi, Soroush  and
      Hassanpour, Saeed",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.27/",
    doi = "10.18653/v1/2024.blackboxnlp-1.27",
    pages = "431--451"
}
@inproceedings{kamahi-yaghoobzadeh-2024-counterfactuals,
    title = "Counterfactuals As a Means for Evaluating Faithfulness of Attribution Methods in Autoregressive Language Models",
    author = "Kamahi, Sepehr  and
      Yaghoobzadeh, Yadollah",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.28/",
    doi = "10.18653/v1/2024.blackboxnlp-1.28",
    pages = "452--468"
}
@inproceedings{zhang-etal-2024-investigating,
    title = "Investigating Layer Importance in Large Language Models",
    author = "Zhang, Yang  and
      Dong, Yanfei  and
      Kawaguchi, Kenji",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.29/",
    doi = "10.18653/v1/2024.blackboxnlp-1.29",
    pages = "469--479"
}
@inproceedings{saphra-wiegreffe-2024-mechanistic,
    title = "Mechanistic?",
    author = "Saphra, Naomi  and
      Wiegreffe, Sarah",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.30/",
    doi = "10.18653/v1/2024.blackboxnlp-1.30",
    pages = "480--498"
}
@inproceedings{sakai-etal-2024-toward,
    title = "Toward the Evaluation of Large Language Models Considering Score Variance across Instruction Templates",
    author = "Sakai, Yusuke  and
      Nohejl, Adam  and
      Hang, Jiangnan  and
      Kamigaito, Hidetaka  and
      Watanabe, Taro",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.31/",
    doi = "10.18653/v1/2024.blackboxnlp-1.31",
    pages = "499--529"
}
@inproceedings{ghilardi-etal-2024-accelerating,
    title = "Accelerating Sparse Autoencoder Training via Layer-Wise Transfer Learning in Large Language Models",
    author = "Ghilardi, Davide  and
      Belotti, Federico  and
      Molinari, Marco  and
      Lim, Jaehyuk",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.32/",
    doi = "10.18653/v1/2024.blackboxnlp-1.32",
    pages = "530--550"
}
@inproceedings{su-etal-2024-wrapper,
    title = "Wrapper Boxes for Faithful Attribution of Model Predictions to Training Data",
    author = "Su, Yiheng  and
      Li, Junyi Jessy  and
      Lease, Matthew",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.33/",
    doi = "10.18653/v1/2024.blackboxnlp-1.33",
    pages = "551--576"
}
@inproceedings{scalena-etal-2024-multi,
    title = "Multi-property Steering of Large Language Models with Dynamic Activation Composition",
    author = "Scalena, Daniel  and
      Sarti, Gabriele  and
      Nissim, Malvina",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.34/",
    doi = "10.18653/v1/2024.blackboxnlp-1.34",
    pages = "577--603"
}
@inproceedings{tighidet-etal-2024-probing,
    title = "Probing Language Models on Their Knowledge Source",
    author = "Tighidet, Zineddine  and
      Mei, Jiali  and
      Piwowarski, Benjamin  and
      Gallinari, Patrick",
    editor = "Belinkov, Yonatan  and
      Kim, Najoung  and
      Jumelet, Jaap  and
      Mohebbi, Hosein  and
      Mueller, Aaron  and
      Chen, Hanjie",
    booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, US",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/json-schema/2024.blackboxnlp-1.35/",
    doi = "10.18653/v1/2024.blackboxnlp-1.35",
    pages = "604--614"
}
