@article{lui-etal-2014-automatic,
title = "Automatic Detection and Language Identification of Multilingual Documents",
author = "Lui, Marco and
Lau, Jey Han and
Baldwin, Timothy",
editor = "Lin, Dekang and
Collins, Michael and
Lee, Lillian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "2",
year = "2014",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/Q14-1003/",
doi = "10.1162/tacl_a_00163",
pages = "27--40",
abstract = "Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web."
}
Markdown (Informal)
[Automatic Detection and Language Identification of Multilingual Documents](https://preview.aclanthology.org/jlcl-multiple-ingestion/Q14-1003/) (Lui et al., TACL 2014)
ACL