@inproceedings{huidrom-etal-2021-em,
title = "{EM} Corpus: a comparable corpus for a less-resourced language pair {M}anipuri-{E}nglish",
author = "Huidrom, Rudali and
Lepage, Yves and
Khomdram, Khogendra",
editor = "Rapp, Reinhard and
Sharoff, Serge and
Zweigenbaum, Pierre",
booktitle = "Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)",
month = sep,
year = "2021",
address = "Online (Virtual Mode)",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.bucc-1.8/",
pages = "60--67",
abstract = "In this paper, we introduce a sentence-level comparable text corpus crawled and created for the less-resourced language pair, Manipuri(mni) and English (eng). Our monolingual corpora comprise 1.88 million Manipuri sentences and 1.45 million English sentences, and our parallel corpus comprises 124,975 Manipuri-English sentence pairs. These data were crawled and collected over a year from August 2020 to March 2021 from a local newspaper website called {\textquoteleft}The Sangai Express.' The resources reported in this paper are made available to help the low-resourced languages community for MT/NLP tasks."
}
Markdown (Informal)
[EM Corpus: a comparable corpus for a less-resourced language pair Manipuri-English](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.bucc-1.8/) (Huidrom et al., BUCC 2021)
ACL