@inproceedings{rozen-etal-2019-diversify,
title = "Diversify Your Datasets: Analyzing Generalization via Controlled Variance in Adversarial Datasets",
author = "Rozen, Ohad and
Shwartz, Vered and
Aharoni, Roee and
Dagan, Ido",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K19-1019",
doi = "10.18653/v1/K19-1019",
pages = "196--205",
abstract = "Phenomenon-specific {``}adversarial{''} datasets have been recently designed to perform targeted stress-tests for particular inference types. Recent work (Liu et al., 2019a) proposed that such datasets can be utilized for training NLI and other types of models, often allowing to learn the phenomenon in focus and improve on the challenge dataset, indicating a {``}blind spot{''} in the original training data. Yet, although a model can improve in such a training process, it might still be vulnerable to other challenge datasets targeting the same phenomenon but drawn from a different distribution, such as having a different syntactic complexity level. In this work, we extend this method to drive conclusions about a model{'}s ability to learn and generalize a target phenomenon rather than to {``}learn{''} a dataset, by controlling additional aspects in the adversarial datasets. We demonstrate our approach on two inference phenomena {--} dative alternation and numerical reasoning, elaborating, and in some cases contradicting, the results of Liu et al.. Our methodology enables building better challenge datasets for creating more robust models, and may yield better model understanding and subsequent overarching improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rozen-etal-2019-diversify">
<titleInfo>
<title>Diversify Your Datasets: Analyzing Generalization via Controlled Variance in Adversarial Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ohad</namePart>
<namePart type="family">Rozen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roee</namePart>
<namePart type="family">Aharoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Dagan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Phenomenon-specific “adversarial” datasets have been recently designed to perform targeted stress-tests for particular inference types. Recent work (Liu et al., 2019a) proposed that such datasets can be utilized for training NLI and other types of models, often allowing to learn the phenomenon in focus and improve on the challenge dataset, indicating a “blind spot” in the original training data. Yet, although a model can improve in such a training process, it might still be vulnerable to other challenge datasets targeting the same phenomenon but drawn from a different distribution, such as having a different syntactic complexity level. In this work, we extend this method to drive conclusions about a model’s ability to learn and generalize a target phenomenon rather than to “learn” a dataset, by controlling additional aspects in the adversarial datasets. We demonstrate our approach on two inference phenomena – dative alternation and numerical reasoning, elaborating, and in some cases contradicting, the results of Liu et al.. Our methodology enables building better challenge datasets for creating more robust models, and may yield better model understanding and subsequent overarching improvements.</abstract>
<identifier type="citekey">rozen-etal-2019-diversify</identifier>
<identifier type="doi">10.18653/v1/K19-1019</identifier>
<location>
<url>https://aclanthology.org/K19-1019</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>196</start>
<end>205</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diversify Your Datasets: Analyzing Generalization via Controlled Variance in Adversarial Datasets
%A Rozen, Ohad
%A Shwartz, Vered
%A Aharoni, Roee
%A Dagan, Ido
%S Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong, China
%F rozen-etal-2019-diversify
%X Phenomenon-specific “adversarial” datasets have been recently designed to perform targeted stress-tests for particular inference types. Recent work (Liu et al., 2019a) proposed that such datasets can be utilized for training NLI and other types of models, often allowing to learn the phenomenon in focus and improve on the challenge dataset, indicating a “blind spot” in the original training data. Yet, although a model can improve in such a training process, it might still be vulnerable to other challenge datasets targeting the same phenomenon but drawn from a different distribution, such as having a different syntactic complexity level. In this work, we extend this method to drive conclusions about a model’s ability to learn and generalize a target phenomenon rather than to “learn” a dataset, by controlling additional aspects in the adversarial datasets. We demonstrate our approach on two inference phenomena – dative alternation and numerical reasoning, elaborating, and in some cases contradicting, the results of Liu et al.. Our methodology enables building better challenge datasets for creating more robust models, and may yield better model understanding and subsequent overarching improvements.
%R 10.18653/v1/K19-1019
%U https://aclanthology.org/K19-1019
%U https://doi.org/10.18653/v1/K19-1019
%P 196-205
Markdown (Informal)
[Diversify Your Datasets: Analyzing Generalization via Controlled Variance in Adversarial Datasets](https://aclanthology.org/K19-1019) (Rozen et al., CoNLL 2019)
ACL