@inproceedings{santhanam-etal-2026-stylometry,
title = "Stylometry at {S}em{E}val-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection",
author = "Santhanam, Sruthi and
Sarkar, Parthib and
Sharma, Yashvardhan",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.172/",
pages = "1319--1325",
ISBN = "979-8-89176-414-9",
abstract = {Machine-generated code detection is examined under out-of-distribution conditions where robust generalization is required. A hybrid feature representation is used in which code snippets are encoded through character-level TF{--}IDF patterns together with explicit structural indicators capturing properties such as verbosity and formatting behavior. Variability across generators is handled through clustering-based expert specialization, and predictions are produced using an ensemble of logistic regression and Na{\"i}ve Bayes models with calibrated thresholds. Experimental results show that the proposed approach performs competitively despite relying on simple linear classifiers. The findings suggest that persistent structural patterns in code provide reliable cross-domain signals for identifying machine-generated programs.}
}Markdown (Informal)
[Stylometry at SemEval-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.172/) (Santhanam et al., SemEval 2026)
ACL