@inproceedings{hamilton-etal-2025-city,
title = "A City of Millions: Mapping Literary Social Networks At Scale",
author = "Hamilton, Sil and
Hicke, Rebecca and
Mimno, David and
Wilkens, Matthew",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Bizzoni, Yuri and
Miyagawa, So and
Alnajjar, Khalid},
booktitle = "Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities",
month = may,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2025.nlp4dh-1.46/",
doi = "10.18653/v1/2025.nlp4dh-1.46",
pages = "543--549",
ISBN = "979-8-89176-234-3",
abstract = "We release 70,509 high-quality social networks extracted from multilingual fiction and nonfiction narratives. We additionally provide metadata for {\textasciitilde}30,000 of these texts (73{\%} nonfiction and 27{\%} fiction) written between 1800 and 1999 in 58 languages. This dataset provides information on historical social worlds at an unprecedented scale, including data for 2,510,021 individuals in 2,805,482 pair-wise relationships annotated for affinity and relationship type. We achieve this scale by automating previously manual methods of extracting social networks; specifically, we adapt an existing annotation task as a language model prompt, ensuring consistency at scale with the use of structured output. This dataset serves as a unique resource for humanities and social science research by providing data on cognitive models of social realities."
}
Markdown (Informal)
[A City of Millions: Mapping Literary Social Networks At Scale](https://preview.aclanthology.org/moar-dois/2025.nlp4dh-1.46/) (Hamilton et al., NLP4DH 2025)
ACL
- Sil Hamilton, Rebecca Hicke, David Mimno, and Matthew Wilkens. 2025. A City of Millions: Mapping Literary Social Networks At Scale. In Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities, pages 543–549, Albuquerque, USA. Association for Computational Linguistics.