@inproceedings{rani-etal-2022-mhe,
title = "{MHE}: Code-Mixed Corpora for Similar Language Identification",
author = "Rani, Priya and
McCrae, John P. and
Fransen, Theodorus",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.366/",
pages = "3425--3433",
abstract = "This paper introduces a new Magahi-Hindi-English (MHE) code-mixed data-set for similar language identification (SMLID), where Magahi is a less-resourced minority language. This corpus provides a language id at two levels: word and sentence. This data-set is the first Magahi-Hindi-English code-mixed data-set for similar language identification task. Furthermore, we will discuss the complexity of the data-set and provide a few baselines for the language identification task."
}
Markdown (Informal)
[MHE: Code-Mixed Corpora for Similar Language Identification](https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.366/) (Rani et al., LREC 2022)
ACL