@inproceedings{baker-kadiyala-2026-system,
title = "A System for Dynamically Tracking Content Moderation on {R}eddit",
author = "Baker, George Arthur and
Kadiyala, Bharadwaj",
editor = "Durrett, Greg and
Jian, Ping",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-demo.42/",
pages = "428--435",
ISBN = "979-8-89176-392-0",
abstract = "Recent work in natural language processing, human-computer interaction, and computational social science takes interest in the study of decentralized content moderation, in which individual communities largely determine their own norms, rules, and enforcement thereof. A key challenge to this body of work is that, once moderated, content and related variables become difficult or impossible to recover; previous work often relied on 3rd-party historical data sources, but recent world events, legal disputes, and policy shifts have significantly disrupted these services, practically disabling their research use-cases. As a result, in order to conduct new research and reproduce previous results, researchers must record content as it{'}s created, and monitor variables of interest over time. In this paper we present and publicly release a software system for the dynamic monitoring of Reddit posts, communities, and moderation actions, to enable scalable and reproducible research on decentralized platform governance and content moderation. To the authors' knowledge, at the time of publication this system is the only available solution for general-purpose, real-time, policy-compliant longitudinal data collection on Reddit. Furthermore, the system{'}s integration with the official Reddit API enables the collection of authentication-gated data such as community engagement metrics and moderation team information, which was unavailable in previous historical data sources."
}Markdown (Informal)
[A System for Dynamically Tracking Content Moderation on Reddit](https://preview.aclanthology.org/ingest-acl/2026.acl-demo.42/) (Baker & Kadiyala, ACL 2026)
ACL
- George Arthur Baker and Bharadwaj Kadiyala. 2026. A System for Dynamically Tracking Content Moderation on Reddit. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 428–435, San Diego, California, United States. Association for Computational Linguistics.