@inproceedings{dervisoglu-site-2026-codexa,
title = "Codexa at {S}em{E}val-2026 Task 13: Loss Engineering and Diverse Ensemble Strategies for Multi-Class Code Authorship Attribution",
author = "Dervi{\c{s}}o{\u{g}}lu, An{\i}l and
Site, Atakan",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.441/",
pages = "3602--3607",
ISBN = "979-8-89176-414-9",
abstract = "We describe our system for SemEval-2026 Task 13, Subtask B: code classification into 11 categories (human-written or generated by one of 10 LLM families). The task presents extreme class imbalance and distribution shift across multiple generators provided in the dataset (31 in training, 59 in test, with 36 unseen). On that focus, we approached with two components: (1) UniXcoder as the encoder with Label-Distribution-Aware Margin (LDAM) loss for handling class imbalance, which provides a +7{\%} absolute improvement over the cross-entropy baseline; and (2) a diverse ensemble of 12 models trained with different objectives and architectures which is detailed in the appendix, combined with hard voting. Our system achieves 41.28{\%} Macro F1 on the official test set. We find that loss engineering and ensemble diversity matter more than domain adaptation techniques, which consistently degraded test performance."
}Markdown (Informal)
[Codexa at SemEval-2026 Task 13: Loss Engineering and Diverse Ensemble Strategies for Multi-Class Code Authorship Attribution](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.441/) (Dervişoğlu & Site, SemEval 2026)
ACL