@inproceedings{stahlhut-2019-interactive,
title = "Interactive Evidence Detection: train state-of-the-art model out-of-domain or simple model interactively?",
author = "Stahlhut, Chris",
booktitle = "Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6613",
doi = "10.18653/v1/D19-6613",
pages = "79--89",
abstract = "Finding evidence is of vital importance in research as well as fact checking and an evidence detection method would be useful in speeding up this process. However, when addressing a new topic there is no training data and there are two approaches to get started. One could use large amounts of out-of-domain data to train a state-of-the-art method, or to use the small data that a person creates while working on the topic. In this paper, we address this problem in two steps. First, by simulating users who read source documents and label sentences they can use as evidence, thereby creating small amounts of training data for an interactively trained evidence detection model; and second, by comparing such an interactively trained model against a pre-trained model that has been trained on large out-of-domain data. We found that an interactively trained model not only often out-performs a state-of-the-art model but also requires significantly lower amounts of computational resources. Therefore, especially when computational resources are scarce, e.g. no GPU available, training a smaller model on the fly is preferable to training a well generalising but resource hungry out-of-domain model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stahlhut-2019-interactive">
<titleInfo>
<title>Interactive Evidence Detection: train state-of-the-art model out-of-domain or simple model interactively?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Stahlhut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Finding evidence is of vital importance in research as well as fact checking and an evidence detection method would be useful in speeding up this process. However, when addressing a new topic there is no training data and there are two approaches to get started. One could use large amounts of out-of-domain data to train a state-of-the-art method, or to use the small data that a person creates while working on the topic. In this paper, we address this problem in two steps. First, by simulating users who read source documents and label sentences they can use as evidence, thereby creating small amounts of training data for an interactively trained evidence detection model; and second, by comparing such an interactively trained model against a pre-trained model that has been trained on large out-of-domain data. We found that an interactively trained model not only often out-performs a state-of-the-art model but also requires significantly lower amounts of computational resources. Therefore, especially when computational resources are scarce, e.g. no GPU available, training a smaller model on the fly is preferable to training a well generalising but resource hungry out-of-domain model.</abstract>
<identifier type="citekey">stahlhut-2019-interactive</identifier>
<identifier type="doi">10.18653/v1/D19-6613</identifier>
<location>
<url>https://aclanthology.org/D19-6613</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>79</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Interactive Evidence Detection: train state-of-the-art model out-of-domain or simple model interactively?
%A Stahlhut, Chris
%S Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong, China
%F stahlhut-2019-interactive
%X Finding evidence is of vital importance in research as well as fact checking and an evidence detection method would be useful in speeding up this process. However, when addressing a new topic there is no training data and there are two approaches to get started. One could use large amounts of out-of-domain data to train a state-of-the-art method, or to use the small data that a person creates while working on the topic. In this paper, we address this problem in two steps. First, by simulating users who read source documents and label sentences they can use as evidence, thereby creating small amounts of training data for an interactively trained evidence detection model; and second, by comparing such an interactively trained model against a pre-trained model that has been trained on large out-of-domain data. We found that an interactively trained model not only often out-performs a state-of-the-art model but also requires significantly lower amounts of computational resources. Therefore, especially when computational resources are scarce, e.g. no GPU available, training a smaller model on the fly is preferable to training a well generalising but resource hungry out-of-domain model.
%R 10.18653/v1/D19-6613
%U https://aclanthology.org/D19-6613
%U https://doi.org/10.18653/v1/D19-6613
%P 79-89
Markdown (Informal)
[Interactive Evidence Detection: train state-of-the-art model out-of-domain or simple model interactively?](https://aclanthology.org/D19-6613) (Stahlhut, EMNLP 2019)
ACL