@inproceedings{bjorklund-devinney-2023-computer,
title = "Computer, enhence: {POS}-tagging improvements for nonbinary pronoun use in {S}wedish",
author = {Bj{\"o}rklund, Henrik and
Devinney, Hannah},
editor = "Chakravarthi, Bharathi R. and
Bharathi, B. and
Griffith, Joephine and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the Third Workshop on Language Technology for Equality, Diversity and Inclusion",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.ltedi-1.8/",
pages = "54--61",
abstract = "Part of Speech (POS) taggers for Swedish routinely fail for the third person gender-neutral pronoun ``hen'', despite the fact that it has been a well-established part of the Swedish language since at least 2014. In addition to simply being a form of gender bias, this failure can have negative effects on other tasks relying on POS information. We demonstrate the usefulness of semi-synthetic augmented datasets in a case study, retraining a POS tagger to correctly recognize ``hen'' as a personal pronoun. We evaluate our retrained models for both tag accuracy and on a downstream task (dependency parsing) in a classicial NLP pipeline. Our results show that adding such data works to correct for the disparity in performance. The accuracy rate for identifying ``hen'' as a pronoun can be brought up to acceptable levels with only minor adjustments to the tagger{'}s vocabulary files. Performance parity to gendered pronouns can be reached after retraining with only a few hundred examples. This increase in POS tag accuracy also results in improvements for dependency parsing sentences containing hen."
}
Markdown (Informal)
[Computer, enhence: POS-tagging improvements for nonbinary pronoun use in Swedish](https://preview.aclanthology.org/fix-sig-urls/2023.ltedi-1.8/) (Björklund & Devinney, LTEDI 2023)
ACL