@inproceedings{roy-etal-2020-topic,
    title = "A Topic-Aligned Multilingual Corpus of {W}ikipedia Articles for Studying Information Asymmetry in Low Resource Languages",
    author = "Roy, Dwaipayan  and
      Bhatia, Sumit  and
      Jain, Prateek",
    editor = "Calzolari, Nicoletta  and
      B{\'e}chet, Fr{\'e}d{\'e}ric  and
      Blache, Philippe  and
      Choukri, Khalid  and
      Cieri, Christopher  and
      Declerck, Thierry  and
      Goggi, Sara  and
      Isahara, Hitoshi  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Mazo, H{\'e}l{\`e}ne  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
    month = may,
    year = "2020",
    address = "Marseille, France",
    publisher = "European Language Resources Association",
    url = "https://preview.aclanthology.org/ingest-emnlp/2020.lrec-1.289/",
    pages = "2373--2380",
    language = "eng",
    ISBN = "979-10-95546-34-4",
    abstract = "Wikipedia is the largest web-based open encyclopedia covering more than three hundred languages. However, different language editions of Wikipedia differ significantly in terms of their information coverage. We present a systematic comparison of information coverage in English Wikipedia (most exhaustive) and Wikipedias in eight other widely spoken languages (Arabic, German, Hindi, Korean, Portuguese, Russian, Spanish and Turkish). We analyze the content present in the respective Wikipedias in terms of the coverage of topics as well as the depth of coverage of topics included in these Wikipedias. Our analysis quantifies and provides useful insights about the information gap that exists between different language editions of Wikipedia and offers a roadmap for the IR community to bridge this gap."
}Markdown (Informal)
[A Topic-Aligned Multilingual Corpus of Wikipedia Articles for Studying Information Asymmetry in Low Resource Languages](https://preview.aclanthology.org/ingest-emnlp/2020.lrec-1.289/) (Roy et al., LREC 2020)
ACL