@inproceedings{ahmadi-2020-building,
title = "Building a Corpus for the {Z}aza{--}Gorani Language Family",
author = "Ahmadi, Sina",
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Scherrer, Yves},
booktitle = "Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics (ICCL)",
url = "https://preview.aclanthology.org/ingest_wac_2008/2020.vardial-1.7/",
pages = "70--78",
abstract = "Thanks to the growth of local communities and various news websites along with the increasing accessibility of the Web, some of the endangered and less-resourced languages have a chance to revive in the information era. Therefore, the Web is considered a huge resource that can be used to extract language corpora which enable researchers to carry out various studies in linguistics and language technology. The Zaza{--}Gorani language family is a linguistic subgroup of the Northwestern Iranian languages for which there is no significant corpus available. Motivated to create one, in this paper we present our endeavour to collect a corpus in Zazaki and Gorani languages containing over 1.6M and 194k word tokens, respectively. This corpus is publicly available."
}
Markdown (Informal)
[Building a Corpus for the Zaza–Gorani Language Family](https://preview.aclanthology.org/ingest_wac_2008/2020.vardial-1.7/) (Ahmadi, VarDial 2020)
ACL