@article{verma-mehler-2026-predicting,
title = "Predicting Topic (Co-)Occurrence Using Topic Networks Built from the {P}roject {G}utenberg Corpus",
author = "Verma, Bhuvanesh and
Mehler, Alexander",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.65/",
pages = "860--869",
abstract = "Although temporal topic modeling has been widely applied to scientific and legal texts, literary corpora have largely been overlooked in this regard. To address this issue, we analyze topic evolution in a subset of the Project Gutenberg (PG) corpus. We model this subset as a sequence of topic networks that capture the emergence, persistence, and interaction of thematic structures over decades. Using supervised topic representations, we predict nodes (topics) and edges (topic pairings) to forecast future topics and their co-occurrence. Our experiments demonstrate moderate to strong temporal persistence in topic connectivity patterns across three topic systems, with ROC-AUC and AP values consistently above 0.85. We find that the temporal span of topic networks significantly impacts predictive performance: longer spans improve the stability and recall of topic presence, while shorter spans better capture evolving topic relationships. Overall, our findings demonstrate the predictability of topics in literary texts over time."
}Markdown (Informal)
[Predicting Topic (Co-)Occurrence Using Topic Networks Built from the Project Gutenberg Corpus](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.65/) (Verma & Mehler, LREC 2026)
ACL