@inproceedings{halacsy-etal-2008-parallel,
title = "Parallel Creation of {G}igaword Corpora for Medium Density Languages - an Interim Report",
author = "Hal{\'a}csy, P{\'e}ter and
Kornai, Andr{\'a}s and
N{\'e}meth, P{\'e}ter and
Varga, D{\'a}niel",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}`08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/ingest_wac_2008/L08-1587/",
abstract = "For increased speed in developing gigaword language resources for medium resource density languages we integrated several FOSS tools in the HUN* toolkit. While the speed and efficiency of the resulting pipeline has surpassed our expectations, our experience in developing LDC-style resource packages for Uzbek and Kurdish makes clear that neither the data collection nor the subsequent processing stages can be fully automated."
}
Markdown (Informal)
[Parallel Creation of Gigaword Corpora for Medium Density Languages - an Interim Report](https://preview.aclanthology.org/ingest_wac_2008/L08-1587/) (Halácsy et al., LREC 2008)
ACL