@inproceedings{munro-morrison-2020-detecting,
title = "Detecting Independent Pronoun Bias with Partially-Synthetic Data Generation",
author = "Munro, Robert and
Morrison, Alex (Carmen)",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.157",
doi = "10.18653/v1/2020.emnlp-main.157",
pages = "2011--2017",
abstract = "We report that state-of-the-art parsers consistently failed to identify {``}hers{''} and {``}theirs{''} as pronouns but identified the masculine equivalent {``}his{''}. We find that the same biases exist in recent language models like BERT. While some of the bias comes from known sources, like training data with gender imbalances, we find that the bias is {\_}amplified{\_} in the language models and that linguistic differences between English pronouns that are not inherently biased can become biases in some machine learning models. We introduce a new technique for measuring bias in models, using Bayesian approximations to generate partially-synthetic data from the model itself.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="munro-morrison-2020-detecting">
<titleInfo>
<title>Detecting Independent Pronoun Bias with Partially-Synthetic Data Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Munro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="given">(Carmen)</namePart>
<namePart type="family">Morrison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We report that state-of-the-art parsers consistently failed to identify “hers” and “theirs” as pronouns but identified the masculine equivalent “his”. We find that the same biases exist in recent language models like BERT. While some of the bias comes from known sources, like training data with gender imbalances, we find that the bias is _amplified_ in the language models and that linguistic differences between English pronouns that are not inherently biased can become biases in some machine learning models. We introduce a new technique for measuring bias in models, using Bayesian approximations to generate partially-synthetic data from the model itself.</abstract>
<identifier type="citekey">munro-morrison-2020-detecting</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.157</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.157</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>2011</start>
<end>2017</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Independent Pronoun Bias with Partially-Synthetic Data Generation
%A Munro, Robert
%A Morrison, Alex (Carmen)
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F munro-morrison-2020-detecting
%X We report that state-of-the-art parsers consistently failed to identify “hers” and “theirs” as pronouns but identified the masculine equivalent “his”. We find that the same biases exist in recent language models like BERT. While some of the bias comes from known sources, like training data with gender imbalances, we find that the bias is _amplified_ in the language models and that linguistic differences between English pronouns that are not inherently biased can become biases in some machine learning models. We introduce a new technique for measuring bias in models, using Bayesian approximations to generate partially-synthetic data from the model itself.
%R 10.18653/v1/2020.emnlp-main.157
%U https://aclanthology.org/2020.emnlp-main.157
%U https://doi.org/10.18653/v1/2020.emnlp-main.157
%P 2011-2017
Markdown (Informal)
[Detecting Independent Pronoun Bias with Partially-Synthetic Data Generation](https://aclanthology.org/2020.emnlp-main.157) (Munro & Morrison, EMNLP 2020)
ACL