@inproceedings{mohseni-minaei-bidgoli-2010-persian,
title = "A {P}ersian Part-Of-Speech Tagger Based on Morphological Analysis",
author = "Mohseni, Mahdi and
Minaei-bidgoli, Behrouz",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf",
abstract = "This paper describes a method based on morphological analysis of words for a Persian Part-Of-Speech (POS) tagging system. This is a main part of a process for expanding a large Persian corpus called Peyekare (or Textual Corpus of Persian Language). Peykare is arranged into two parts: annotated and unannotated parts. We use the annotated part in order to create an automatic morphological analyzer, a main segment of the system. Morphosyntactic features of Persian words cause two problems: the number of tags is increased in the corpus (586 tags) and the form of the words is changed. This high number of tags debilitates any taggers to work efficiently. From other side the change of word forms reduces the frequency of words with the same lemma; and the number of words belonging to a specific tag reduces as well. This problem also has a bad effect on statistical taggers. The morphological analyzer by removing the problems helps the tagger to cover a large number of tags in the corpus. Using a Markov tagger the method is evaluated on the corpus. The experiments show the efficiency of the method in Persian POS tagging.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mohseni-minaei-bidgoli-2010-persian">
<titleInfo>
<title>A Persian Part-Of-Speech Tagger Based on Morphological Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahdi</namePart>
<namePart type="family">Mohseni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behrouz</namePart>
<namePart type="family">Minaei-bidgoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a method based on morphological analysis of words for a Persian Part-Of-Speech (POS) tagging system. This is a main part of a process for expanding a large Persian corpus called Peyekare (or Textual Corpus of Persian Language). Peykare is arranged into two parts: annotated and unannotated parts. We use the annotated part in order to create an automatic morphological analyzer, a main segment of the system. Morphosyntactic features of Persian words cause two problems: the number of tags is increased in the corpus (586 tags) and the form of the words is changed. This high number of tags debilitates any taggers to work efficiently. From other side the change of word forms reduces the frequency of words with the same lemma; and the number of words belonging to a specific tag reduces as well. This problem also has a bad effect on statistical taggers. The morphological analyzer by removing the problems helps the tagger to cover a large number of tags in the corpus. Using a Markov tagger the method is evaluated on the corpus. The experiments show the efficiency of the method in Persian POS tagging.</abstract>
<identifier type="citekey">mohseni-minaei-bidgoli-2010-persian</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf</url>
</location>
<part>
<date>2010-may</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Persian Part-Of-Speech Tagger Based on Morphological Analysis
%A Mohseni, Mahdi
%A Minaei-bidgoli, Behrouz
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 may
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F mohseni-minaei-bidgoli-2010-persian
%X This paper describes a method based on morphological analysis of words for a Persian Part-Of-Speech (POS) tagging system. This is a main part of a process for expanding a large Persian corpus called Peyekare (or Textual Corpus of Persian Language). Peykare is arranged into two parts: annotated and unannotated parts. We use the annotated part in order to create an automatic morphological analyzer, a main segment of the system. Morphosyntactic features of Persian words cause two problems: the number of tags is increased in the corpus (586 tags) and the form of the words is changed. This high number of tags debilitates any taggers to work efficiently. From other side the change of word forms reduces the frequency of words with the same lemma; and the number of words belonging to a specific tag reduces as well. This problem also has a bad effect on statistical taggers. The morphological analyzer by removing the problems helps the tagger to cover a large number of tags in the corpus. Using a Markov tagger the method is evaluated on the corpus. The experiments show the efficiency of the method in Persian POS tagging.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf
Markdown (Informal)
[A Persian Part-Of-Speech Tagger Based on Morphological Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf) (Mohseni & Minaei-bidgoli, LREC 2010)
ACL