@inproceedings{liu-etal-2020-multistage,
title = "Multistage Fusion with Forget Gate for Multimodal Summarization in Open-Domain Videos",
author = "Liu, Nayu and
Sun, Xian and
Yu, Hongfeng and
Zhang, Wenkai and
Xu, Guangluan",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.emnlp-main.144/",
doi = "10.18653/v1/2020.emnlp-main.144",
pages = "1834--1845",
abstract = "Multimodal summarization for open-domain videos is an emerging task, aiming to generate a summary from multisource information (video, audio, transcript). Despite the success of recent multiencoder-decoder frameworks on this task, existing methods lack fine-grained multimodality interactions of multisource inputs. Besides, unlike other multimodal tasks, this task has longer multimodal sequences with more redundancy and noise. To address these two issues, we propose a multistage fusion network with the fusion forget gate module, which builds upon this approach by modeling fine-grained interactions between the modalities through a multistep fusion schema and controlling the flow of redundant information between multimodal long sequences via a forgetting module. Experimental results on the How2 dataset show that our proposed model achieves a new state-of-the-art performance. Comprehensive analysis empirically verifies the effectiveness of our fusion schema and forgetting module on multiple encoder-decoder architectures. Specially, when using high noise ASR transcripts (WER{\ensuremath{>}}30{\%}), our model still achieves performance close to the ground-truth transcript model, which reduces manual annotation cost."
}
Markdown (Informal)
[Multistage Fusion with Forget Gate for Multimodal Summarization in Open-Domain Videos](https://preview.aclanthology.org/fix-sig-urls/2020.emnlp-main.144/) (Liu et al., EMNLP 2020)
ACL