@inproceedings{yan-etal-2021-l2c,
title = "{L}2{C}: Describing Visual Differences Needs Semantic Understanding of Individuals",
author = "Yan, An and
Wang, Xin and
Fu, Tsu-Jui and
Wang, William Yang",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.eacl-main.196/",
doi = "10.18653/v1/2021.eacl-main.196",
pages = "2315--2320",
abstract = "Recent advances in language and vision push forward the research of captioning a single image to describing visual differences between image pairs. Suppose there are two images, I{\_}1 and I{\_}2, and the task is to generate a description W{\_}1,2 comparing them, existing methods directly model I{\_}1, I{\_}2 -{\ensuremath{>}} W{\_}1,2 mapping without the semantic understanding of individuals. In this paper, we introduce a Learning-to-Compare (L2C) model, which learns to understand the semantic structures of these two images and compare them while learning to describe each one. We demonstrate that L2C benefits from a comparison between explicit semantic representations and single-image captions, and generalizes better on the new testing image pairs. It outperforms the baseline on both automatic evaluation and human evaluation for the Birds-to-Words dataset."
}
Markdown (Informal)
[L2C: Describing Visual Differences Needs Semantic Understanding of Individuals](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.eacl-main.196/) (Yan et al., EACL 2021)
ACL