@inproceedings{hennig-wilson-2020-diachronic,
title = "Diachronic Embeddings for People in the News",
author = "Hennig, Felix and
Wilson, Steven",
booktitle = "Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcss-1.19",
doi = "10.18653/v1/2020.nlpcss-1.19",
pages = "173--183",
abstract = "Previous English-language diachronic change models based on word embeddings have typically used single tokens to represent entities, including names of people. This leads to issues with both ambiguity (resulting in one embedding representing several distinct and unrelated people) and unlinked references (leading to several distinct embeddings which represent the same person). In this paper, we show that using named entity recognition and heuristic name linking steps before training a diachronic embedding model leads to more accurate representations of references to people, as compared to the token-only baseline. In large news corpus of articles from The Guardian, we provide examples of several types of analysis that can be performed using these new embeddings. Further, we show that real world events and context changes can be detected using our proposed model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hennig-wilson-2020-diachronic">
<titleInfo>
<title>Diachronic Embeddings for People in the News</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Hennig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous English-language diachronic change models based on word embeddings have typically used single tokens to represent entities, including names of people. This leads to issues with both ambiguity (resulting in one embedding representing several distinct and unrelated people) and unlinked references (leading to several distinct embeddings which represent the same person). In this paper, we show that using named entity recognition and heuristic name linking steps before training a diachronic embedding model leads to more accurate representations of references to people, as compared to the token-only baseline. In large news corpus of articles from The Guardian, we provide examples of several types of analysis that can be performed using these new embeddings. Further, we show that real world events and context changes can be detected using our proposed model.</abstract>
<identifier type="citekey">hennig-wilson-2020-diachronic</identifier>
<identifier type="doi">10.18653/v1/2020.nlpcss-1.19</identifier>
<location>
<url>https://aclanthology.org/2020.nlpcss-1.19</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>173</start>
<end>183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diachronic Embeddings for People in the News
%A Hennig, Felix
%A Wilson, Steven
%S Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F hennig-wilson-2020-diachronic
%X Previous English-language diachronic change models based on word embeddings have typically used single tokens to represent entities, including names of people. This leads to issues with both ambiguity (resulting in one embedding representing several distinct and unrelated people) and unlinked references (leading to several distinct embeddings which represent the same person). In this paper, we show that using named entity recognition and heuristic name linking steps before training a diachronic embedding model leads to more accurate representations of references to people, as compared to the token-only baseline. In large news corpus of articles from The Guardian, we provide examples of several types of analysis that can be performed using these new embeddings. Further, we show that real world events and context changes can be detected using our proposed model.
%R 10.18653/v1/2020.nlpcss-1.19
%U https://aclanthology.org/2020.nlpcss-1.19
%U https://doi.org/10.18653/v1/2020.nlpcss-1.19
%P 173-183
Markdown (Informal)
[Diachronic Embeddings for People in the News](https://aclanthology.org/2020.nlpcss-1.19) (Hennig & Wilson, NLP+CSS 2020)
ACL
- Felix Hennig and Steven Wilson. 2020. Diachronic Embeddings for People in the News. In Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science, pages 173–183, Online. Association for Computational Linguistics.