@inproceedings{nguyen-etal-2026-wikifirst-genre,
title = "{W}iki{F}irst: A Genre-Fixed, Content-controlled Corpus for Evaluating Content Effects in Authorship Analysis",
author = "Nguyen, Dung and
Sat, G. {\c{C}}a{\u{g}}atay and
Pyshkin, Evgeny and
Blake, John",
editor = "Alves, Diego and
Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Pagel, Janis and
Szpakowicz, Stan",
booktitle = "Proceedings of the 10th Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.latechclfl-1.31/",
pages = "323--327",
ISBN = "979-8-89176-373-9",
abstract = "This paper presents the design and construction of WikiFirst, a corpus for investigating the impact of content variation on authorship similarity under a fixed genre. Prior work has investigated individual authorial style and impact of genre. However, the role of content has remained underexplored due to the lack of suitable data. We address this gap by constructing a Wikipedia-based corpus consisting exclusively of first revisions authored by non-anonymous editors, thereby ensuring high authorship certainty while maintaining a stable encyclopaedic genre."
}Markdown (Informal)
[WikiFirst: A Genre-Fixed, Content-controlled Corpus for Evaluating Content Effects in Authorship Analysis](https://preview.aclanthology.org/ingest-eacl/2026.latechclfl-1.31/) (Nguyen et al., LaTeCH-CLfL 2026)
ACL