@inproceedings{marszalek-kowalewska-2021-impact,
title = "The Impact of Text Normalization on Multiword Expressions Discovery in {P}ersian",
author = "Marsza{\l}ek-Kowalewska, Katarzyna",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.ranlp-1.106/",
pages = "929--939",
abstract = "This paper evaluates normalization procedures of Persian text for a downstream NLP task - multiword expressions (MWEs) discovery. We discuss the challenges the Persian language poses for NLP and evaluate open-source tools that try to address these difficulties. The best-performing tool is later used in the main task - MWEs discovery. In order to discover MWEs, we use association measures and a subpart of the MirasText corpus. The results show that an F-score is 26{\%} higher in the case of normalized input data."
}
Markdown (Informal)
[The Impact of Text Normalization on Multiword Expressions Discovery in Persian](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.ranlp-1.106/) (Marszałek-Kowalewska, RANLP 2021)
ACL