@inproceedings{wang-etal-2026-checkmiabench,
title = "{C}heck{MIAB}ench: Firm Foundations For Membership Inference Attacks on Language Models",
author = "Wang, Jeffrey George and
Wang, Jason and
Li, Marvin and
Neel, Seth",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-short.30/",
pages = "364--370",
ISBN = "979-8-89176-391-3",
abstract = "Membership inference attacks (MIAs) are a canonical way to assess a machine learning model{'}s privacy properties. Although several attempts have been made to evaluate MIAs on language models, the extant literature has suffered numerous difficulties in constructing clean evaluations to test new techniques. In particular, subtle distribution shifts between member and non-member sets can undermine the statistical validity of MIAs; recent work has underscored this by showing that ``blind'' methods with no access to the underlying model can perform far better than published methods on the same benchmarks. This paper constructs a benchmark for principled evaluation of MIAs against LLMs, by leveraging the insight that training data before and after a fixed point during training are drawn from the same distribution. Therefore, all open-source models with intermediate checkpoints and public training data can be converted into MIA testbeds. We apply our framework to a half-dozen published attacks on the Pythia and OLMo family of models, from 70M to 7B parameters. To facilitate further privacy research, we open-source a modular library for designing and implementing attacks in this setting: https://github.com/safr-ai-lab/pandora{\_}llm."
}Markdown (Informal)
[CheckMIABench: Firm Foundations For Membership Inference Attacks on Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-short.30/) (Wang et al., ACL 2026)
ACL