@inproceedings{dmitrieva-tiedemann-2021-creating,
title = "Creating an Aligned {R}ussian Text Simplification Dataset from Language Learner Data",
author = {Dmitrieva, Anna and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing",
month = apr,
year = "2021",
address = "Kiyv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.bsnlp-1.8",
pages = "73--79",
abstract = "Parallel language corpora where regular texts are aligned with their simplified versions can be used in both natural language processing and theoretical linguistic studies. They are essential for the task of automatic text simplification, but can also provide valuable insights into the characteristics that make texts more accessible and reveal strategies that human experts use to simplify texts. Today, there exist a few parallel datasets for English and Simple English, but many other languages lack such data. In this paper we describe our work on creating an aligned Russian-Simple Russian dataset composed of Russian literature texts adapted for learners of Russian as a foreign language. This will be the first parallel dataset in this domain, and one of the first Simple Russian datasets in general.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dmitrieva-tiedemann-2021-creating">
<titleInfo>
<title>Creating an Aligned Russian Text Simplification Dataset from Language Learner Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Dmitrieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kiyv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Parallel language corpora where regular texts are aligned with their simplified versions can be used in both natural language processing and theoretical linguistic studies. They are essential for the task of automatic text simplification, but can also provide valuable insights into the characteristics that make texts more accessible and reveal strategies that human experts use to simplify texts. Today, there exist a few parallel datasets for English and Simple English, but many other languages lack such data. In this paper we describe our work on creating an aligned Russian-Simple Russian dataset composed of Russian literature texts adapted for learners of Russian as a foreign language. This will be the first parallel dataset in this domain, and one of the first Simple Russian datasets in general.</abstract>
<identifier type="citekey">dmitrieva-tiedemann-2021-creating</identifier>
<location>
<url>https://aclanthology.org/2021.bsnlp-1.8</url>
</location>
<part>
<date>2021-apr</date>
<extent unit="page">
<start>73</start>
<end>79</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Creating an Aligned Russian Text Simplification Dataset from Language Learner Data
%A Dmitrieva, Anna
%A Tiedemann, Jörg
%S Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing
%D 2021
%8 apr
%I Association for Computational Linguistics
%C Kiyv, Ukraine
%F dmitrieva-tiedemann-2021-creating
%X Parallel language corpora where regular texts are aligned with their simplified versions can be used in both natural language processing and theoretical linguistic studies. They are essential for the task of automatic text simplification, but can also provide valuable insights into the characteristics that make texts more accessible and reveal strategies that human experts use to simplify texts. Today, there exist a few parallel datasets for English and Simple English, but many other languages lack such data. In this paper we describe our work on creating an aligned Russian-Simple Russian dataset composed of Russian literature texts adapted for learners of Russian as a foreign language. This will be the first parallel dataset in this domain, and one of the first Simple Russian datasets in general.
%U https://aclanthology.org/2021.bsnlp-1.8
%P 73-79
Markdown (Informal)
[Creating an Aligned Russian Text Simplification Dataset from Language Learner Data](https://aclanthology.org/2021.bsnlp-1.8) (Dmitrieva & Tiedemann, BSNLP 2021)
ACL