@inproceedings{hauser-etal-2022-multilingual,
title = "A Multilingual Simplified Language News Corpus",
author = "Hauser, Renate and
Vamvas, Jannis and
Ebling, Sarah and
Volk, Martin",
editor = "Wilkens, Rodrigo and
Alfter, David and
Cardon, R{\'e}mi and
Gala, N{\'u}ria",
booktitle = "Proceedings of the 2nd Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/ingest_wac_2008/2022.readi-1.4/",
pages = "25--30",
abstract = "Simplified language news articles are being offered by specialized web portals in several countries. The thousands of articles that have been published over the years are a valuable resource for natural language processing, especially for efforts towards automatic text simplification. In this paper, we present SNIML, a large multilingual corpus of news in simplified language. The corpus contains 13k simplified news articles written in one of six languages: Finnish, French, Italian, Swedish, English, and German. All articles are shared under open licenses that permit academic use. The level of text simplification varies depending on the news portal. We believe that even though SNIML is not a parallel corpus, it can be useful as a complement to the more homogeneous but often smaller corpora of news in the simplified variety of one language that are currently in use."
}
Markdown (Informal)
[A Multilingual Simplified Language News Corpus](https://preview.aclanthology.org/ingest_wac_2008/2022.readi-1.4/) (Hauser et al., READI 2022)
ACL
- Renate Hauser, Jannis Vamvas, Sarah Ebling, and Martin Volk. 2022. A Multilingual Simplified Language News Corpus. In Proceedings of the 2nd Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) within the 13th Language Resources and Evaluation Conference, pages 25–30, Marseille, France. European Language Resources Association.