@inproceedings{kim-etal-2025-leveraging,
title = "Leveraging Multilingual Training for Authorship Representation: Enhancing Generalization across Languages and Domains",
author = "Kim, Junghwan and
Zhang, Haotian and
Jurgens, David",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1766/",
pages = "34855--34880",
ISBN = "979-8-89176-332-6",
abstract = "Authorship representation (AR) learning, which models an author{'}s unique writing style, has demonstrated strong performance in authorship attribution tasks. However, prior research has primarily focused on monolingual settings{---}mostly in English{---}leaving the potential benefits of multilingual AR models underexplored. We introduce a novel method for multilingual AR learning that incorporates two key innovations: probabilistic content masking, which encourages the model to focus on stylistically indicative words rather than content-specific words, and language-aware batching, which improves contrastive learning by reducing cross-lingual interference. Our model is trained on over 4.5 million authors across 36 languages and 13 domains. It consistently outperforms monolingual baselines in 21 out of 22 non-English languages, achieving an average Recall@8 improvement of 4.85{\%}, with a maximum gain of 15.91{\%} in a single language. Furthermore, it exhibits stronger cross-lingual and cross-domain generalization compared to a monolingual model trained solely on English. Our analysis confirms the effectiveness of both proposed techniques, highlighting their critical roles in the model{'}s improved performance."
}Markdown (Informal)
[Leveraging Multilingual Training for Authorship Representation: Enhancing Generalization across Languages and Domains](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1766/) (Kim et al., EMNLP 2025)
ACL