@inproceedings{sato-heffernan-2020-dialect,
title = "Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect",
author = "Sato, Yo and
Heffernan, Kevin",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.124",
pages = "985--990",
abstract = "We present in this work a universal, character-based method for representing sentences so that one can thereby calculate the distance between any two sentence pair. With a small alphabet, it can function as a proxy of phonemes, and as one of its main uses, we carry out dialect clustering: cluster a dialect/sub-language mixed corpus into sub-groups and see if they coincide with the conventional boundaries of dialects and sub-languages. By using data with multiple Japanese dialects and multiple Slavic languages, we report how well each group clusters, in a manner to partially respond to the question of what separates languages from dialects.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-heffernan-2020-dialect">
<titleInfo>
<title>Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yo</namePart>
<namePart type="family">Sato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Heffernan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We present in this work a universal, character-based method for representing sentences so that one can thereby calculate the distance between any two sentence pair. With a small alphabet, it can function as a proxy of phonemes, and as one of its main uses, we carry out dialect clustering: cluster a dialect/sub-language mixed corpus into sub-groups and see if they coincide with the conventional boundaries of dialects and sub-languages. By using data with multiple Japanese dialects and multiple Slavic languages, we report how well each group clusters, in a manner to partially respond to the question of what separates languages from dialects.</abstract>
<identifier type="citekey">sato-heffernan-2020-dialect</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.124</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>985</start>
<end>990</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect
%A Sato, Yo
%A Heffernan, Kevin
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F sato-heffernan-2020-dialect
%X We present in this work a universal, character-based method for representing sentences so that one can thereby calculate the distance between any two sentence pair. With a small alphabet, it can function as a proxy of phonemes, and as one of its main uses, we carry out dialect clustering: cluster a dialect/sub-language mixed corpus into sub-groups and see if they coincide with the conventional boundaries of dialects and sub-languages. By using data with multiple Japanese dialects and multiple Slavic languages, we report how well each group clusters, in a manner to partially respond to the question of what separates languages from dialects.
%U https://aclanthology.org/2020.lrec-1.124
%P 985-990
Markdown (Informal)
[Dialect Clustering with Character-Based Metrics: in Search of the Boundary of Language and Dialect](https://aclanthology.org/2020.lrec-1.124) (Sato & Heffernan, LREC 2020)
ACL