@inproceedings{pham-etal-2026-uit,
title = "{UIT}-{AMMC} at {S}em{E}val-2026 Task 13: Exploiting Structural Formatting Signatures for Robust {AI}-Generated Code Detection",
author = "Pham, Cuong and
Nguyen, Minh and
Le, Minh and
Nguyen, An and
Nguyen, Chinh",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.60/",
pages = "418--425",
ISBN = "979-8-89176-414-9",
abstract = "We participated in Subtask A with our Structure-Aware Contrastive Cascade, a multi-stage architecture designed to distinguish between human-authored and machine-generated code by integrating generative reasoning with explicit structural linguistic features. Our system focuses on exploiting structural formatting signatures that frequently emerge in AI-generated code as a byproduct of post-training alignment and readability optimization. The pipeline utilizes a Qwen-2.5-Coder 14B model fine-tuned via QLoRA, incorporating stochastic data augmentation techniques to ensure robustness across unseen programming languages. Final classification is achieved through a late-fusion mechanism that combines contrastive probability scores with statistical metrics of code presentation density. For samples exhibiting high epistemic uncertainty, we implement a multi-agent adversarial debate step to refine the final verdict. This approach enabled our system to achieve a Macro F1 score of 0.802, ranking 3rd on the official leaderboard."
}Markdown (Informal)
[UIT-AMMC at SemEval-2026 Task 13: Exploiting Structural Formatting Signatures for Robust AI-Generated Code Detection](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.60/) (Pham et al., SemEval 2026)
ACL