@inproceedings{klang-nugues-2019-docria,
title = "{D}ocria: Processing and Storing Linguistic Data with {W}ikipedia",
author = "Klang, Marcus and
Nugues, Pierre",
editor = "Hartmann, Mareike and
Plank, Barbara",
booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
month = sep # "–" # oct,
year = "2019",
address = "Turku, Finland",
publisher = {Link{\"o}ping University Electronic Press},
url = "https://preview.aclanthology.org/ingest_wac_2008/W19-6148/",
pages = "400--405",
abstract = "The availability of user-generated content has increased significantly over time. Wikipedia is one example of a corpora which spans a huge range of topics and is freely available. Storing and processing these corpora requires flexible documents models as they may contain malicious and incorrect data. Docria is a library which attempts to address this issue by providing a solution which can be used with small to large corpora, from laptops using Python interactively in a Jupyter notebook to clusters running map-reduce frameworks with optimized compiled code. Docria is available as open-source code."
}
Markdown (Informal)
[Docria: Processing and Storing Linguistic Data with Wikipedia](https://preview.aclanthology.org/ingest_wac_2008/W19-6148/) (Klang & Nugues, NoDaLiDa 2019)
ACL