@inproceedings{basaldella-collier-2019-bioreddit,
title = "{B}io{R}eddit: Word Embeddings for User-Generated Biomedical {NLP}",
author = "Basaldella, Marco and
Collier, Nigel",
booktitle = "Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6205",
doi = "10.18653/v1/D19-6205",
pages = "34--38",
abstract = "Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="basaldella-collier-2019-bioreddit">
<titleInfo>
<title>BioReddit: Word Embeddings for User-Generated Biomedical NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Basaldella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nigel</namePart>
<namePart type="family">Collier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content.</abstract>
<identifier type="citekey">basaldella-collier-2019-bioreddit</identifier>
<identifier type="doi">10.18653/v1/D19-6205</identifier>
<location>
<url>https://aclanthology.org/D19-6205</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>34</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BioReddit: Word Embeddings for User-Generated Biomedical NLP
%A Basaldella, Marco
%A Collier, Nigel
%S Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong
%F basaldella-collier-2019-bioreddit
%X Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content.
%R 10.18653/v1/D19-6205
%U https://aclanthology.org/D19-6205
%U https://doi.org/10.18653/v1/D19-6205
%P 34-38
Markdown (Informal)
[BioReddit: Word Embeddings for User-Generated Biomedical NLP](https://aclanthology.org/D19-6205) (Basaldella & Collier, EMNLP 2019)
ACL