@inproceedings{sharif-etal-2020-techtexc,
title = "{T}ech{T}ex{C}: Classification of Technical Texts using Convolution and Bidirectional Long Short Term Memory Network",
author = "Sharif, Omar and
Hossain, Eftekhar and
Hoque, Mohammed Moshiul",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task",
month = dec,
year = "2020",
address = "Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-techdofication.8",
pages = "35--39",
abstract = "This paper illustrates the details description of technical text classification system and its results that developed as a part of participation in the shared task TechDofication 2020. The shared task consists of two sub-tasks: (i) first task identify the coarse-grained technical domain of given text in a specified language and (ii) the second task classify a text of computer science domain into fine-grained sub-domains. A classification system (called {`}TechTexC{'}) is developed to perform the classification task using three techniques: convolution neural network (CNN), bidirectional long short term memory (BiLSTM) network, and combined CNN with BiLSTM. Results show that CNN with BiLSTM model outperforms the other techniques concerning task-1 of sub-tasks (a, b, c and g) and task-2a. This combined model obtained f1 scores of 82.63 (sub-task a), 81.95 (sub-task b), 82.39 (sub-task c), 84.37 (sub-task g), and 67.44 (task-2a) on the development dataset. Moreover, in the case of test set, the combined CNN with BiLSTM approach achieved that higher accuracy for the subtasks 1a (70.76{\%}), 1b (79.97{\%}), 1c (65.45{\%}), 1g (49.23{\%}) and 2a (70.14{\%}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharif-etal-2020-techtexc">
<titleInfo>
<title>TechTexC: Classification of Technical Texts using Convolution and Bidirectional Long Short Term Memory Network</title>
</titleInfo>
<name type="personal">
<namePart type="given">Omar</namePart>
<namePart type="family">Sharif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eftekhar</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Moshiul</namePart>
<namePart type="family">Hoque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-dec</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task</title>
</titleInfo>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper illustrates the details description of technical text classification system and its results that developed as a part of participation in the shared task TechDofication 2020. The shared task consists of two sub-tasks: (i) first task identify the coarse-grained technical domain of given text in a specified language and (ii) the second task classify a text of computer science domain into fine-grained sub-domains. A classification system (called ‘TechTexC’) is developed to perform the classification task using three techniques: convolution neural network (CNN), bidirectional long short term memory (BiLSTM) network, and combined CNN with BiLSTM. Results show that CNN with BiLSTM model outperforms the other techniques concerning task-1 of sub-tasks (a, b, c and g) and task-2a. This combined model obtained f1 scores of 82.63 (sub-task a), 81.95 (sub-task b), 82.39 (sub-task c), 84.37 (sub-task g), and 67.44 (task-2a) on the development dataset. Moreover, in the case of test set, the combined CNN with BiLSTM approach achieved that higher accuracy for the subtasks 1a (70.76%), 1b (79.97%), 1c (65.45%), 1g (49.23%) and 2a (70.14%).</abstract>
<identifier type="citekey">sharif-etal-2020-techtexc</identifier>
<location>
<url>https://aclanthology.org/2020.icon-techdofication.8</url>
</location>
<part>
<date>2020-dec</date>
<extent unit="page">
<start>35</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TechTexC: Classification of Technical Texts using Convolution and Bidirectional Long Short Term Memory Network
%A Sharif, Omar
%A Hossain, Eftekhar
%A Hoque, Mohammed Moshiul
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task
%D 2020
%8 dec
%I NLP Association of India (NLPAI)
%C Patna, India
%F sharif-etal-2020-techtexc
%X This paper illustrates the details description of technical text classification system and its results that developed as a part of participation in the shared task TechDofication 2020. The shared task consists of two sub-tasks: (i) first task identify the coarse-grained technical domain of given text in a specified language and (ii) the second task classify a text of computer science domain into fine-grained sub-domains. A classification system (called ‘TechTexC’) is developed to perform the classification task using three techniques: convolution neural network (CNN), bidirectional long short term memory (BiLSTM) network, and combined CNN with BiLSTM. Results show that CNN with BiLSTM model outperforms the other techniques concerning task-1 of sub-tasks (a, b, c and g) and task-2a. This combined model obtained f1 scores of 82.63 (sub-task a), 81.95 (sub-task b), 82.39 (sub-task c), 84.37 (sub-task g), and 67.44 (task-2a) on the development dataset. Moreover, in the case of test set, the combined CNN with BiLSTM approach achieved that higher accuracy for the subtasks 1a (70.76%), 1b (79.97%), 1c (65.45%), 1g (49.23%) and 2a (70.14%).
%U https://aclanthology.org/2020.icon-techdofication.8
%P 35-39
Markdown (Informal)
[TechTexC: Classification of Technical Texts using Convolution and Bidirectional Long Short Term Memory Network](https://aclanthology.org/2020.icon-techdofication.8) (Sharif et al., ICON 2020)
ACL