@inproceedings{zeng-2021-multi,
title = "Multi Task Learning based Framework for Multimodal Classification",
author = "Zeng, Danting",
editor = "Zadeh, Amir and
Morency, Louis-Philippe and
Liang, Paul Pu and
Ross, Candace and
Salakhutdinov, Ruslan and
Poria, Soujanya and
Cambria, Erik and
Shi, Kelly",
booktitle = "Proceedings of the Third Workshop on Multimodal Artificial Intelligence",
month = jun,
year = "2021",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.maiworkshop-1.5/",
doi = "10.18653/v1/2021.maiworkshop-1.5",
pages = "30--35",
abstract = "Large-scale multi-modal classification aim to distinguish between different multi-modal data, and it has drawn dramatically attentions since last decade. In this paper, we propose a multi-task learning-based framework for the multimodal classification task, which consists of two branches: multi-modal autoencoder branch and attention-based multi-modal modeling branch. Multi-modal autoencoder can receive multi-modal features and obtain the interactive information which called multi-modal encoder feature, and use this feature to reconstitute all the input data. Besides, multi-modal encoder feature can be used to enrich the raw dataset, and improve the performance of downstream tasks (such as classification task). As for attention-based multimodal modeling branch, we first employ attention mechanism to make the model focused on important features, then we use the multi-modal encoder feature to enrich the input information, achieve a better performance. We conduct extensive experiments on different dataset, the results demonstrate the effectiveness of proposed framework."
}
Markdown (Informal)
[Multi Task Learning based Framework for Multimodal Classification](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.maiworkshop-1.5/) (Zeng, maiworkshop 2021)
ACL