@inproceedings{wiedemann-etal-2018-multilingual,
title = "A Multilingual Information Extraction Pipeline for Investigative Journalism",
author = "Wiedemann, Gregor and
Yimam, Seid Muhie and
Biemann, Chris",
editor = "Blanco, Eduardo and
Lu, Wei",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-2014/",
doi = "10.18653/v1/D18-2014",
pages = "78--83",
abstract = "We introduce an advanced information extraction pipeline to automatically process very large collections of unstructured textual data for the purpose of investigative journalism. The pipeline serves as a new input processor for the upcoming major release of our New/s/leak 2.0 software, which we develop in cooperation with a large German news organization. The use case is that journalists receive a large collection of files up to several Gigabytes containing unknown contents. Collections may originate either from official disclosures of documents, e.g. Freedom of Information Act requests, or unofficial data leaks."
}
Markdown (Informal)
[A Multilingual Information Extraction Pipeline for Investigative Journalism](https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-2014/) (Wiedemann et al., EMNLP 2018)
ACL