@inproceedings{erdmann-etal-2020-paradigm,
title = "The Paradigm Discovery Problem",
author = "Erdmann, Alexander and
Elsner, Micha and
Wu, Shijie and
Cotterell, Ryan and
Habash, Nizar",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.695",
doi = "10.18653/v1/2020.acl-main.695",
pages = "7778--7790",
abstract = "This work treats the paradigm discovery problem (PDP), the task of learning an inflectional morphological system from unannotated sentences. We formalize the PDP and develop evaluation metrics for judging systems. Using currently available resources, we construct datasets for the task. We also devise a heuristic benchmark for the PDP and report empirical results on five diverse languages. Our benchmark system first makes use of word embeddings and string similarity to cluster forms by cell and by paradigm. Then, we bootstrap a neural transducer on top of the clustered data to predict words to realize the empty paradigm slots. An error analysis of our system suggests clustering by cell across different inflection classes is the most pressing challenge for future work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erdmann-etal-2020-paradigm">
<titleInfo>
<title>The Paradigm Discovery Problem</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Erdmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Micha</namePart>
<namePart type="family">Elsner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shijie</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work treats the paradigm discovery problem (PDP), the task of learning an inflectional morphological system from unannotated sentences. We formalize the PDP and develop evaluation metrics for judging systems. Using currently available resources, we construct datasets for the task. We also devise a heuristic benchmark for the PDP and report empirical results on five diverse languages. Our benchmark system first makes use of word embeddings and string similarity to cluster forms by cell and by paradigm. Then, we bootstrap a neural transducer on top of the clustered data to predict words to realize the empty paradigm slots. An error analysis of our system suggests clustering by cell across different inflection classes is the most pressing challenge for future work.</abstract>
<identifier type="citekey">erdmann-etal-2020-paradigm</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.695</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.695</url>
</location>
<part>
<date>2020-jul</date>
<extent unit="page">
<start>7778</start>
<end>7790</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Paradigm Discovery Problem
%A Erdmann, Alexander
%A Elsner, Micha
%A Wu, Shijie
%A Cotterell, Ryan
%A Habash, Nizar
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 jul
%I Association for Computational Linguistics
%C Online
%F erdmann-etal-2020-paradigm
%X This work treats the paradigm discovery problem (PDP), the task of learning an inflectional morphological system from unannotated sentences. We formalize the PDP and develop evaluation metrics for judging systems. Using currently available resources, we construct datasets for the task. We also devise a heuristic benchmark for the PDP and report empirical results on five diverse languages. Our benchmark system first makes use of word embeddings and string similarity to cluster forms by cell and by paradigm. Then, we bootstrap a neural transducer on top of the clustered data to predict words to realize the empty paradigm slots. An error analysis of our system suggests clustering by cell across different inflection classes is the most pressing challenge for future work.
%R 10.18653/v1/2020.acl-main.695
%U https://aclanthology.org/2020.acl-main.695
%U https://doi.org/10.18653/v1/2020.acl-main.695
%P 7778-7790
Markdown (Informal)
[The Paradigm Discovery Problem](https://aclanthology.org/2020.acl-main.695) (Erdmann et al., ACL 2020)
ACL
- Alexander Erdmann, Micha Elsner, Shijie Wu, Ryan Cotterell, and Nizar Habash. 2020. The Paradigm Discovery Problem. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 7778–7790, Online. Association for Computational Linguistics.