@inproceedings{ellendorff-etal-2014-using,
title = "Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction",
author = "Ellendorff, Tilia and
Rinaldi, Fabio and
Clematide, Simon",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/L14-1110/",
pages = "3736--3741",
abstract = "We show how to use large biomedical databases in order to obtain a gold standard for training a machine learning system over a corpus of biomedical text. As an example we use the Comparative Toxicogenomics Database (CTD) and describe by means of a short case study how the obtained data can be applied. We explain how we exploit the structure of the database for compiling training material and a testset. Using a Naive Bayes document classification approach based on words, stem bigrams and MeSH descriptors we achieve a macro-average F-score of 61{\%} on a subset of 8 action terms. This outperforms a baseline system based on a lookup of stemmed keywords by more than 20{\%}. Furthermore, we present directions of future work, taking the described system as a vantage point. Future work will be aiming towards a weakly supervised system capable of discovering complete biomedical interactions and events."
}
Markdown (Informal)
[Using Large Biomedical Databases as Gold Annotations for Automatic Relation Extraction](https://preview.aclanthology.org/add-emnlp-2024-awards/L14-1110/) (Ellendorff et al., LREC 2014)
ACL