@inproceedings{habib-2026-konigsberg,
title = {K{\"o}nigsberg at {S}em{E}val-2026 Task 13: Beyond Language Models: A Low-Resource Feature-Driven and Data-Flow Embedding Approach for Machine-Generated Code Detection},
author = "Habib, Shahir",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.169/",
pages = "1288--1297",
ISBN = "979-8-89176-414-9",
abstract = "The rise of Large Language Models (LLMs)has increased the need for reliable detection ofmachine-generated code. This paper presentsa low-resource, hybrid detection frameworkdeveloped for for SemEval-2026 Task 13 ,designed to operate efficiently without the computational overhead of end-to-end fine-tuningof large models. Our approach combines(i) comprehensive feature extraction pipelinethat calculates interpretable software metricscapturing stylistic and structural properties ofcode, and (ii) we leverage the semantic capabilities of GraphCodeBERT by extractingfrozen embeddings from its pre-trained encoder to model semantic and data-flow information while preserving generalizability. Thisfusion enables efficient detection of machinegenerated code across multiple programminglanguages (Python, C++, Java, and Go) andimproves robustness under out-of-distributionsettings. This feature-driven fusion offers acompetitive, computation-efficient alternativeto purely LLM-based fully fine-tuned models,achieving an F1-score of 38.26."
}Markdown (Informal)
[Königsberg at SemEval-2026 Task 13: Beyond Language Models: A Low-Resource Feature-Driven and Data-Flow Embedding Approach for Machine-Generated Code Detection](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.169/) (Habib, SemEval 2026)
ACL