@inproceedings{milajevs-2017-toward,
title = "Toward a Comparable Corpus of {L}atvian, {R}ussian and {E}nglish Tweets",
author = "Milajevs, Dmitrijs",
editor = "Sharoff, Serge and
Zweigenbaum, Pierre and
Rapp, Reinhard",
booktitle = "Proceedings of the 10th Workshop on Building and Using Comparable Corpora",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-2505/",
doi = "10.18653/v1/W17-2505",
pages = "26--30",
abstract = "Twitter has become a rich source for linguistic data. Here, a possibility of building a trilingual Latvian-Russian-English corpus of tweets from Riga, Latvia is investigated. Such a corpus, once constructed, might be of great use for multiple purposes including training machine translation models, examining cross-lingual phenomena and studying the population of Riga. This pilot study shows that it is feasible to build such a resource by collecting and analysing a pilot corpus, which is made publicly available and can be used to construct a large comparable corpus."
}
Markdown (Informal)
[Toward a Comparable Corpus of Latvian, Russian and English Tweets](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-2505/) (Milajevs, BUCC 2017)
ACL