@inproceedings{davis-2012-tajik,
title = "{T}ajik-{F}arsi {P}ersian Transliteration Using Statistical Machine Translation",
author = "Davis, Chris Irwin",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}`12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L12-1601/",
pages = "3988--3995",
abstract = "Tajik Persian is a dialect of Persian spoken primarily in Tajikistan and written with a modified Cyrillic alphabet. Iranian Persian, or Farsi, as it is natively called, is the lingua franca of Iran and is written with the Persian alphabet, a modified Arabic script. Although the spoken versions of Tajik and Farsi are mutually intelligible to educated speakers of both languages, the difference between the writing systems constitutes a barrier to text compatibility between the two languages. This paper presents a system to transliterate text between these two different Persian dialects that use incompatible writing systems. The system also serves as a mechanism to facilitate sharing of computational linguistic resources between the two languages. This is relevant because of the disparity in resources for Tajik versus Farsi."
}
Markdown (Informal)
[Tajik-Farsi Persian Transliteration Using Statistical Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/L12-1601/) (Davis, LREC 2012)
ACL