@inproceedings{nishida-etal-2022-improving,
title = "Improving Few-Shot Image Classification Using Machine- and User-Generated Natural Language Descriptions",
author = "Nishida, Kosuke and
Nishida, Kyosuke and
Nishioka, Shuichi",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-naacl.106/",
doi = "10.18653/v1/2022.findings-naacl.106",
pages = "1421--1430",
abstract = "Humans can obtain the knowledge of novel visual concepts from language descriptions, and we thus use the few-shot image classification task to investigate whether a machine learning model can have this capability. Our proposed model, LIDE (Learning from Image and DEscription), has a text decoder to generate the descriptions and a text encoder to obtain the text representations of machine- or user-generated descriptions. We confirmed that LIDE with machine-generated descriptions outperformed baseline models. Moreover, the performance was improved further with high-quality user-generated descriptions. The generated descriptions can be viewed as the explanations of the model`s predictions, and we observed that such explanations were consistent with prediction results. We also investigated why the language description improves the few-shot image classification performance by comparing the image representations and the text representations in the feature spaces."
}
Markdown (Informal)
[Improving Few-Shot Image Classification Using Machine- and User-Generated Natural Language Descriptions](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-naacl.106/) (Nishida et al., Findings 2022)
ACL