@inproceedings{jung-etal-2022-language,
title = "Language-agnostic Semantic Consistent Text-to-Image Generation",
author = "Jung, SeongJun and
Choi, Woo Suk and
Choi, Seongho and
Zhang, Byoung-Tak",
editor = "Bugliarello, Emanuele and
Cheng, Kai-Wei and
Elliott, Desmond and
Gella, Spandana and
Kamath, Aishwarya and
Li, Liunian Harold and
Liu, Fangyu and
Pfeiffer, Jonas and
Ponti, Edoardo Maria and
Srinivasan, Krishna and
Vuli{\'c}, Ivan and
Yang, Yinfei and
Yin, Da",
booktitle = "Proceedings of the Workshop on Multilingual Multimodal Learning",
month = may,
year = "2022",
address = "Dublin, Ireland and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.mml-1.1",
doi = "10.18653/v1/2022.mml-1.1",
pages = "1--5",
abstract = "Recent GAN-based text-to-image generation models have advanced that they can generate photo-realistic images matching semantically with descriptions. However, research on multi-lingual text-to-image generation has not been carried out yet much. There are two problems when constructing a multilingual text-to-image generation model: 1) language imbalance issue in text-to-image paired datasets and 2) generating images that have the same meaning but are semantically inconsistent with each other in texts expressed in different languages. To this end, we propose a Language-agnostic Semantic Consistent Generative Adversarial Network (LaSC-GAN) for text-to-image generation, which can generate semantically consistent images via language-agnostic text encoder and Siamese mechanism. Experiments on relatively low-resource language text-image datasets show that the model has comparable generation quality as images generated by high-resource language text, and generates semantically consistent images for texts with the same meaning even in different languages.",
}
Markdown (Informal)
[Language-agnostic Semantic Consistent Text-to-Image Generation](https://aclanthology.org/2022.mml-1.1) (Jung et al., MML 2022)
ACL