@inproceedings{ahmadi-2020-building,
    title = "Building a Corpus for the {Z}aza{--}Gorani Language Family",
    author = "Ahmadi, Sina",
    editor = {Zampieri, Marcos  and
      Nakov, Preslav  and
      Ljube{\v{s}}i{\'c}, Nikola  and
      Tiedemann, J{\"o}rg  and
      Scherrer, Yves},
    booktitle = "Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
    publisher = "International Committee on Computational Linguistics (ICCL)",
    url = "https://preview.aclanthology.org/ingest-emnlp/2020.vardial-1.7/",
    pages = "70--78",
    abstract = "Thanks to the growth of local communities and various news websites along with the increasing accessibility of the Web, some of the endangered and less-resourced languages have a chance to revive in the information era. Therefore, the Web is considered a huge resource that can be used to extract language corpora which enable researchers to carry out various studies in linguistics and language technology. The Zaza{--}Gorani language family is a linguistic subgroup of the Northwestern Iranian languages for which there is no significant corpus available. Motivated to create one, in this paper we present our endeavour to collect a corpus in Zazaki and Gorani languages containing over 1.6M and 194k word tokens, respectively. This corpus is publicly available."
}Markdown (Informal)
[Building a Corpus for the Zaza–Gorani Language Family](https://preview.aclanthology.org/ingest-emnlp/2020.vardial-1.7/) (Ahmadi, VarDial 2020)
ACL