@inproceedings{sukhotin-yu-2026-hyperparameteromens,
title = "{H}yperparameter{O}mens at {S}em{E}val-2026 Task 13: Various approaches to detecting machine- generated code",
author = "Sukhotin, Dmitry and
Yu, How",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.177/",
pages = "1372--1378",
ISBN = "979-8-89176-414-9",
abstract = "We present our systems for SemEval-2026 Task 13, built on the Droid resource suite and benchmark setting. For Subtask A (binary classification of human-written vs. machine-generated code), lexical baselines such as TF{--}IDF and character n-grams transferred poorly from the LeetCode training distribution to the production-code evaluation split. After correcting pipeline errors that obscured true performance and selecting stable AST features under domain shift, our final system uses 5 uncorrelated features and achieves 0.57 macro F1 on the public test set.For Subtask C (4-way authorship classification of human, AI, hybrid, and adversarial) lexical baselines performed poorly under a significant vocabulary shift. Deep semantic models proved more promising, and a per-class weighted ensemble which included these models achieved 0.57 macro F1 on the public test set"
}Markdown (Informal)
[HyperparameterOmens at SemEval-2026 Task 13: Various approaches to detecting machine- generated code](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.177/) (Sukhotin & Yu, SemEval 2026)
ACL