@inproceedings{bianchi-etal-2021-pre,
title = "Pre-training is a Hot Topic: Contextualized Document Embeddings Improve Topic Coherence",
author = "Bianchi, Federico and
Terragni, Silvia and
Hovy, Dirk",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-short.96",
doi = "10.18653/v1/2021.acl-short.96",
pages = "759--766",
abstract = "Topic models extract groups of words from documents, whose interpretation as a topic hopefully allows for a better understanding of the data. However, the resulting word groups are often not coherent, making them harder to interpret. Recently, neural topic models have shown improvements in overall coherence. Concurrently, contextual embeddings have advanced the state of the art of neural models in general. In this paper, we combine contextualized representations with neural topic models. We find that our approach produces more meaningful and coherent topics than traditional bag-of-words topic models and recent neural models. Our results indicate that future improvements in language models will translate into better topic models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bianchi-etal-2021-pre">
<titleInfo>
<title>Pre-training is a Hot Topic: Contextualized Document Embeddings Improve Topic Coherence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Bianchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvia</namePart>
<namePart type="family">Terragni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Topic models extract groups of words from documents, whose interpretation as a topic hopefully allows for a better understanding of the data. However, the resulting word groups are often not coherent, making them harder to interpret. Recently, neural topic models have shown improvements in overall coherence. Concurrently, contextual embeddings have advanced the state of the art of neural models in general. In this paper, we combine contextualized representations with neural topic models. We find that our approach produces more meaningful and coherent topics than traditional bag-of-words topic models and recent neural models. Our results indicate that future improvements in language models will translate into better topic models.</abstract>
<identifier type="citekey">bianchi-etal-2021-pre</identifier>
<identifier type="doi">10.18653/v1/2021.acl-short.96</identifier>
<location>
<url>https://aclanthology.org/2021.acl-short.96</url>
</location>
<part>
<date>2021-aug</date>
<extent unit="page">
<start>759</start>
<end>766</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pre-training is a Hot Topic: Contextualized Document Embeddings Improve Topic Coherence
%A Bianchi, Federico
%A Terragni, Silvia
%A Hovy, Dirk
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2021
%8 aug
%I Association for Computational Linguistics
%C Online
%F bianchi-etal-2021-pre
%X Topic models extract groups of words from documents, whose interpretation as a topic hopefully allows for a better understanding of the data. However, the resulting word groups are often not coherent, making them harder to interpret. Recently, neural topic models have shown improvements in overall coherence. Concurrently, contextual embeddings have advanced the state of the art of neural models in general. In this paper, we combine contextualized representations with neural topic models. We find that our approach produces more meaningful and coherent topics than traditional bag-of-words topic models and recent neural models. Our results indicate that future improvements in language models will translate into better topic models.
%R 10.18653/v1/2021.acl-short.96
%U https://aclanthology.org/2021.acl-short.96
%U https://doi.org/10.18653/v1/2021.acl-short.96
%P 759-766
Markdown (Informal)
[Pre-training is a Hot Topic: Contextualized Document Embeddings Improve Topic Coherence](https://aclanthology.org/2021.acl-short.96) (Bianchi et al., ACL 2021)
ACL