@inproceedings{das-etal-2020-rule,
title = "A Rule Based Lightweight {B}engali Stemmer",
author = "Das, Souvick and
Pandit, Rajat and
Naskar, Sudip Kumar",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2020",
address = "Indian Institute of Technology Patna, Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-main.55",
pages = "400--408",
abstract = "In the field of Natural Language Processing (NLP) the process of stemming plays a significant role. Stemmer transforms an inflected word to its root form. Stemmer significantly increases the efficiency of Information Retrieval (IR) systems. It is a very basic yet fundamental text pre-processing task widely used in many NLP tasks. Several important works on stemming have been carried out by researchers in English and other major languages. In this paper, we study and review existing works on stemming in Bengali and other Indian languages. Finally, we propose a rule based approach that explores Bengali morphology and leverages WordNet to achieve better accuracy. Our algorithm produced stemming accuracy of 98.86{\%} for Nouns and 99.75{\%} for Verbs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-etal-2020-rule">
<titleInfo>
<title>A Rule Based Lightweight Bengali Stemmer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Souvick</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajat</namePart>
<namePart type="family">Pandit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudip</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Naskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-dec</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Indian Institute of Technology Patna, Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the field of Natural Language Processing (NLP) the process of stemming plays a significant role. Stemmer transforms an inflected word to its root form. Stemmer significantly increases the efficiency of Information Retrieval (IR) systems. It is a very basic yet fundamental text pre-processing task widely used in many NLP tasks. Several important works on stemming have been carried out by researchers in English and other major languages. In this paper, we study and review existing works on stemming in Bengali and other Indian languages. Finally, we propose a rule based approach that explores Bengali morphology and leverages WordNet to achieve better accuracy. Our algorithm produced stemming accuracy of 98.86% for Nouns and 99.75% for Verbs.</abstract>
<identifier type="citekey">das-etal-2020-rule</identifier>
<location>
<url>https://aclanthology.org/2020.icon-main.55</url>
</location>
<part>
<date>2020-dec</date>
<extent unit="page">
<start>400</start>
<end>408</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Rule Based Lightweight Bengali Stemmer
%A Das, Souvick
%A Pandit, Rajat
%A Naskar, Sudip Kumar
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON)
%D 2020
%8 dec
%I NLP Association of India (NLPAI)
%C Indian Institute of Technology Patna, Patna, India
%F das-etal-2020-rule
%X In the field of Natural Language Processing (NLP) the process of stemming plays a significant role. Stemmer transforms an inflected word to its root form. Stemmer significantly increases the efficiency of Information Retrieval (IR) systems. It is a very basic yet fundamental text pre-processing task widely used in many NLP tasks. Several important works on stemming have been carried out by researchers in English and other major languages. In this paper, we study and review existing works on stemming in Bengali and other Indian languages. Finally, we propose a rule based approach that explores Bengali morphology and leverages WordNet to achieve better accuracy. Our algorithm produced stemming accuracy of 98.86% for Nouns and 99.75% for Verbs.
%U https://aclanthology.org/2020.icon-main.55
%P 400-408
Markdown (Informal)
[A Rule Based Lightweight Bengali Stemmer](https://aclanthology.org/2020.icon-main.55) (Das et al., ICON 2020)
ACL
- Souvick Das, Rajat Pandit, and Sudip Kumar Naskar. 2020. A Rule Based Lightweight Bengali Stemmer. In Proceedings of the 17th International Conference on Natural Language Processing (ICON), pages 400–408, Indian Institute of Technology Patna, Patna, India. NLP Association of India (NLPAI).