@inproceedings{altammami-etal-2020-constructing,
title = "Constructing a Bilingual Hadith Corpus Using a Segmentation Tool",
author = "Altammami, Shatha and
Atwell, Eric and
Alsalka, Ammar",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.415",
pages = "3390--3398",
abstract = "This article describes the process of gathering and constructing a bilingual parallel corpus of Islamic Hadith, which is the set of narratives reporting different aspects of the prophet Muhammad{'}s life. The corpus data is gathered from the six canonical Hadith collections using a custom segmentation tool that automatically segments and annotates the two Hadith components with 92{\%} accuracy. This Hadith segmenter minimises the costs of language resource creation and produces consistent results independently from previous knowledge and experiences that usually influence human annotators. The corpus includes more than 10M tokens and will be freely available via the LREC repository.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="altammami-etal-2020-constructing">
<titleInfo>
<title>Constructing a Bilingual Hadith Corpus Using a Segmentation Tool</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shatha</namePart>
<namePart type="family">Altammami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Atwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ammar</namePart>
<namePart type="family">Alsalka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>This article describes the process of gathering and constructing a bilingual parallel corpus of Islamic Hadith, which is the set of narratives reporting different aspects of the prophet Muhammad’s life. The corpus data is gathered from the six canonical Hadith collections using a custom segmentation tool that automatically segments and annotates the two Hadith components with 92% accuracy. This Hadith segmenter minimises the costs of language resource creation and produces consistent results independently from previous knowledge and experiences that usually influence human annotators. The corpus includes more than 10M tokens and will be freely available via the LREC repository.</abstract>
<identifier type="citekey">altammami-etal-2020-constructing</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.415</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>3390</start>
<end>3398</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constructing a Bilingual Hadith Corpus Using a Segmentation Tool
%A Altammami, Shatha
%A Atwell, Eric
%A Alsalka, Ammar
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F altammami-etal-2020-constructing
%X This article describes the process of gathering and constructing a bilingual parallel corpus of Islamic Hadith, which is the set of narratives reporting different aspects of the prophet Muhammad’s life. The corpus data is gathered from the six canonical Hadith collections using a custom segmentation tool that automatically segments and annotates the two Hadith components with 92% accuracy. This Hadith segmenter minimises the costs of language resource creation and produces consistent results independently from previous knowledge and experiences that usually influence human annotators. The corpus includes more than 10M tokens and will be freely available via the LREC repository.
%U https://aclanthology.org/2020.lrec-1.415
%P 3390-3398
Markdown (Informal)
[Constructing a Bilingual Hadith Corpus Using a Segmentation Tool](https://aclanthology.org/2020.lrec-1.415) (Altammami et al., LREC 2020)
ACL