@inproceedings{pu-etal-2026-breaking,
title = "Breaking the Generator Barrier: Disentangled Representation for Generalizable {AI}-Text Detection",
author = "Pu, Xiao and
Cheng, Zepeng and
Yuan, Lin and
Wu, Yu and
Bi, Xiuli",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.120/",
pages = "2586--2598",
ISBN = "979-8-89176-390-6",
abstract = "As large language models (LLMs) generate text that increasingly resembles human writing, the subtle cues that distinguish AI-generated content from human-written content become increasingly challenging to capture. Reliance on generator-specific artifacts is inherently unstable, since new models emerge rapidly and reduce the robustness of such shortcuts. This generalizes unseen generators as a central and challenging problem for AI-text detection. To tackle this challenge, we propose a progressively structured framework that disentangles AI-detection semantics from generator-aware artifacts. This is achieved through a compact latent encoding that encourages semantic minimality, followed by perturbation-based regularization to reduce residual entanglement, and finally a discriminative adaptation stage that aligns representations with task objectives. Experiments on MAGE benchmark, covering 20 representative LLMs across 7 categories, demonstrate consistent improvements over state-of-the-art methods, achieving up to 24.2{\%} accuracy gain and 26.2{\%} $F_1$ improvement. Notably, performance continues to improve as the diversity of training generators increases, confirming strong scalability and generalization in open-set scenarios. Our source code will be publicly available at https://github.com/PuXiao06/DRGD."
}Markdown (Informal)
[Breaking the Generator Barrier: Disentangled Representation for Generalizable AI-Text Detection](https://preview.aclanthology.org/ingest-acl/2026.acl-long.120/) (Pu et al., ACL 2026)
ACL