@inproceedings{mdhaffar-etal-2020-multimodal,
title = "A Multimodal Educational Corpus of Oral Courses: Annotation, Analysis and Case Study",
author = "Mdhaffar, Salima and
Est{\`e}ve, Yannick and
Laurent, Antoine and
Hernandez, Nicolas and
Dufour, Richard and
Charlet, Delphine and
Damnati, Geraldine and
Quiniou, Solen and
Camelin, Nathalie",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.529",
pages = "4293--4301",
abstract = "This corpus is part of the PASTEL (Performing Automated Speech Transcription for Enhancing Learning) project aiming to explore the potential of synchronous speech transcription and application in specific teaching situations. It includes 10 hours of different lectures, manually transcribed and segmented. The main interest of this corpus lies in its multimodal aspect: in addition to speech, the courses were filmed and the written presentation supports (slides) are made available. The dataset may then serve researches in multiple fields, from speech and language to image and video processing. The dataset will be freely available to the research community. In this paper, we first describe in details the annotation protocol, including a detailed analysis of the manually labeled data. Then, we propose some possible use cases of the corpus with baseline results. The use cases concern scientific fields from both speech and text processing, with language model adaptation, thematic segmentation and transcription to slide alignment.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mdhaffar-etal-2020-multimodal">
<titleInfo>
<title>A Multimodal Educational Corpus of Oral Courses: Annotation, Analysis and Case Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Mdhaffar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannick</namePart>
<namePart type="family">Estève</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Laurent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Dufour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delphine</namePart>
<namePart type="family">Charlet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geraldine</namePart>
<namePart type="family">Damnati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Solen</namePart>
<namePart type="family">Quiniou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathalie</namePart>
<namePart type="family">Camelin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>This corpus is part of the PASTEL (Performing Automated Speech Transcription for Enhancing Learning) project aiming to explore the potential of synchronous speech transcription and application in specific teaching situations. It includes 10 hours of different lectures, manually transcribed and segmented. The main interest of this corpus lies in its multimodal aspect: in addition to speech, the courses were filmed and the written presentation supports (slides) are made available. The dataset may then serve researches in multiple fields, from speech and language to image and video processing. The dataset will be freely available to the research community. In this paper, we first describe in details the annotation protocol, including a detailed analysis of the manually labeled data. Then, we propose some possible use cases of the corpus with baseline results. The use cases concern scientific fields from both speech and text processing, with language model adaptation, thematic segmentation and transcription to slide alignment.</abstract>
<identifier type="citekey">mdhaffar-etal-2020-multimodal</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.529</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4293</start>
<end>4301</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multimodal Educational Corpus of Oral Courses: Annotation, Analysis and Case Study
%A Mdhaffar, Salima
%A Estève, Yannick
%A Laurent, Antoine
%A Hernandez, Nicolas
%A Dufour, Richard
%A Charlet, Delphine
%A Damnati, Geraldine
%A Quiniou, Solen
%A Camelin, Nathalie
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F mdhaffar-etal-2020-multimodal
%X This corpus is part of the PASTEL (Performing Automated Speech Transcription for Enhancing Learning) project aiming to explore the potential of synchronous speech transcription and application in specific teaching situations. It includes 10 hours of different lectures, manually transcribed and segmented. The main interest of this corpus lies in its multimodal aspect: in addition to speech, the courses were filmed and the written presentation supports (slides) are made available. The dataset may then serve researches in multiple fields, from speech and language to image and video processing. The dataset will be freely available to the research community. In this paper, we first describe in details the annotation protocol, including a detailed analysis of the manually labeled data. Then, we propose some possible use cases of the corpus with baseline results. The use cases concern scientific fields from both speech and text processing, with language model adaptation, thematic segmentation and transcription to slide alignment.
%U https://aclanthology.org/2020.lrec-1.529
%P 4293-4301
Markdown (Informal)
[A Multimodal Educational Corpus of Oral Courses: Annotation, Analysis and Case Study](https://aclanthology.org/2020.lrec-1.529) (Mdhaffar et al., LREC 2020)
ACL
- Salima Mdhaffar, Yannick Estève, Antoine Laurent, Nicolas Hernandez, Richard Dufour, Delphine Charlet, Geraldine Damnati, Solen Quiniou, and Nathalie Camelin. 2020. A Multimodal Educational Corpus of Oral Courses: Annotation, Analysis and Case Study. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 4293–4301, Marseille, France. European Language Resources Association.