@inproceedings{ellendorff-etal-2014-using,
title = "Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction",
author = "Ellendorff, Tilia and
Rinaldi, Fabio and
Clematide, Simon",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1156_Paper.pdf",
pages = "3736--3741",
abstract = "We show how to use large biomedical databases in order to obtain a gold standard for training a machine learning system over a corpus of biomedical text. As an example we use the Comparative Toxicogenomics Database (CTD) and describe by means of a short case study how the obtained data can be applied. We explain how we exploit the structure of the database for compiling training material and a testset. Using a Naive Bayes document classification approach based on words, stem bigrams and MeSH descriptors we achieve a macro-average F-score of 61{\%} on a subset of 8 action terms. This outperforms a baseline system based on a lookup of stemmed keywords by more than 20{\%}. Furthermore, we present directions of future work, taking the described system as a vantage point. Future work will be aiming towards a weakly supervised system capable of discovering complete biomedical interactions and events.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ellendorff-etal-2014-using">
<titleInfo>
<title>Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tilia</namePart>
<namePart type="family">Ellendorff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Clematide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We show how to use large biomedical databases in order to obtain a gold standard for training a machine learning system over a corpus of biomedical text. As an example we use the Comparative Toxicogenomics Database (CTD) and describe by means of a short case study how the obtained data can be applied. We explain how we exploit the structure of the database for compiling training material and a testset. Using a Naive Bayes document classification approach based on words, stem bigrams and MeSH descriptors we achieve a macro-average F-score of 61% on a subset of 8 action terms. This outperforms a baseline system based on a lookup of stemmed keywords by more than 20%. Furthermore, we present directions of future work, taking the described system as a vantage point. Future work will be aiming towards a weakly supervised system capable of discovering complete biomedical interactions and events.</abstract>
<identifier type="citekey">ellendorff-etal-2014-using</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1156_Paper.pdf</url>
</location>
<part>
<date>2014-may</date>
<extent unit="page">
<start>3736</start>
<end>3741</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction
%A Ellendorff, Tilia
%A Rinaldi, Fabio
%A Clematide, Simon
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 may
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F ellendorff-etal-2014-using
%X We show how to use large biomedical databases in order to obtain a gold standard for training a machine learning system over a corpus of biomedical text. As an example we use the Comparative Toxicogenomics Database (CTD) and describe by means of a short case study how the obtained data can be applied. We explain how we exploit the structure of the database for compiling training material and a testset. Using a Naive Bayes document classification approach based on words, stem bigrams and MeSH descriptors we achieve a macro-average F-score of 61% on a subset of 8 action terms. This outperforms a baseline system based on a lookup of stemmed keywords by more than 20%. Furthermore, we present directions of future work, taking the described system as a vantage point. Future work will be aiming towards a weakly supervised system capable of discovering complete biomedical interactions and events.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/1156_Paper.pdf
%P 3736-3741
Markdown (Informal)
[Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction](http://www.lrec-conf.org/proceedings/lrec2014/pdf/1156_Paper.pdf) (Ellendorff et al., LREC 2014)
ACL