@inproceedings{soni-etal-2022-human,
title = "Human Language Modeling",
author = "Soni, Nikita and
Matero, Matthew and
Balasubramanian, Niranjan and
Schwartz, H. Andrew",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2022.findings-acl.52/",
doi = "10.18653/v1/2022.findings-acl.52",
pages = "622--636",
abstract = "Natural language is generated by people, yet traditional language modeling views words or documents as if generated independently. Here, we propose human language modeling (HuLM), a hierarchical extension to the language modeling problem where by a human- level exists to connect sequences of documents (e.g. social media messages) and capture the notion that human language is moderated by changing human states. We introduce, HaRT, a large-scale transformer model for solving HuLM, pre-trained on approximately 100,000 social media users, and demonstrate it`s effectiveness in terms of both language modeling (perplexity) for social media and fine-tuning for 4 downstream tasks spanning document- and user-levels. Results on all tasks meet or surpass the current state-of-the-art."
}
Markdown (Informal)
[Human Language Modeling](https://preview.aclanthology.org/Author-page-Marten-During-lu/2022.findings-acl.52/) (Soni et al., Findings 2022)
ACL
- Nikita Soni, Matthew Matero, Niranjan Balasubramanian, and H. Andrew Schwartz. 2022. Human Language Modeling. In Findings of the Association for Computational Linguistics: ACL 2022, pages 622–636, Dublin, Ireland. Association for Computational Linguistics.