@inproceedings{nahurna-romanyshyn-2025-gender,
title = "Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for {U}krainian Language Processing",
author = "Nahurna, Olha and
Romanyshyn, Mariana",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fourth Ukrainian Natural Language Processing Workshop (UNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.unlp-1.16/",
pages = "147--161",
ISBN = "979-8-89176-269-5",
abstract = "This paper presents a pipeline for generating gender-balanced datasets through sentence-level gender swapping, addressing the gender-imbalance issue in Ukrainian texts. We select sentences with gender-marked entities, focusing on job titles, generate their inverted alternatives using LLMs and human-in-the-loop, and fine-tune Aya-101 on the resulting dataset for the task of gender swapping. Additionally, we train a Named Entity Recognition (NER) model on gender-balanced data, demonstrating its ability to better recognize gendered entities. The findings unveil the potential of gender-balanced datasets to enhance model robustness and support more fair language processing. Finally, we make a gender-swapped version of NER-UK{\textasciitilde}2.0 and the fine-tuned Aya-101 model available for download and further research."
}
Markdown (Informal)
[Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for Ukrainian Language Processing](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.unlp-1.16/) (Nahurna & Romanyshyn, UNLP 2025)
ACL