@inproceedings{sato-heffernan-2020-dialect,
title = "Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect",
author = "Sato, Yo and
Heffernan, Kevin",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.124/",
pages = "985--990",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "We present in this work a universal, character-based method for representing sentences so that one can thereby calculate the distance between any two sentence pair. With a small alphabet, it can function as a proxy of phonemes, and as one of its main uses, we carry out dialect clustering: cluster a dialect/sub-language mixed corpus into sub-groups and see if they coincide with the conventional boundaries of dialects and sub-languages. By using data with multiple Japanese dialects and multiple Slavic languages, we report how well each group clusters, in a manner to partially respond to the question of what separates languages from dialects."
}
Markdown (Informal)
[Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect](https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.124/) (Sato & Heffernan, LREC 2020)
ACL