@inproceedings{nguyen-etal-2022-behancemt,
title = "{B}ehance{MT}: A Machine Translation Corpus for Livestreaming Video Transcripts",
author = "Nguyen, Minh Van and
Dernoncourt, Franck and
Nguyen, Thien",
editor = "Dernoncourt, Franck and
Nguyen, Thien Huu and
Lai, Viet Dac and
Veyseh, Amir Pouran Ben and
Bui, Trung H. and
Yoon, David Seunghyun",
booktitle = "Proceedings of the First Workshop On Transcript Understanding",
month = oct,
year = "2022",
address = "Gyeongju, South Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.tu-1.4/",
pages = "30--33",
abstract = "Machine translation (MT) is an important task in natural language processing, which aims to translate a sentence in a source language to another sentence with the same/similar semantics in a target language. Despite the huge effort on building MT systems for different language pairs, most previous work focuses on formal-language settings, where text to be translated come from written sources such as books and news articles. As a result, such MT systems could fail to translate livestreaming video transcripts, where text is often shorter and might be grammatically incorrect. To overcome this issue, we introduce a novel MT corpus - BehanceMT for livestreaming video transcript translation. Our corpus contains parallel transcripts for 3 language pairs, where English is the source language and Spanish, Chinese, and Arabic are the target languages. Experimental results show that finetuning a pretrained MT model on BehanceMT significantly improves the performance of the model in translating video transcripts across 3 language pairs. In addition, the finetuned MT model outperforms GoogleTranslate in 2 out of 3 language pairs, further demonstrating the usefulness of our proposed dataset for video transcript translation. BehanceMT will be publicly released upon the acceptance of the paper."
}
Markdown (Informal)
[BehanceMT: A Machine Translation Corpus for Livestreaming Video Transcripts](https://preview.aclanthology.org/fix-sig-urls/2022.tu-1.4/) (Nguyen et al., TU 2022)
ACL