@inproceedings{zhang-wan-2025-r,
title = "{R}-Bind: Unified Enhancement of Attribute and Relation Binding in Text-to-Image Diffusion Models",
author = "Zhang, Huixuan and
Wan, Xiaojun",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.349/",
pages = "6867--6881",
ISBN = "979-8-89176-332-6",
abstract = "Text-to-image models frequently fail to achieve perfect alignment with textual prompts, particularly in maintaining proper semantic binding between semantic elements in the given prompt. Existing approaches typically require costly retraining or focus on only correctly generating the attributes of entities (entity-attribute binding), ignoring the cruciality of correctly generating the relations between entities (entity-relation-entity binding), resulting in unsatisfactory semantic binding performance. In this work, we propose a novel training-free method R-Bind that simultaneously improves both entity-attribute and entity-relation-entity binding. Our method introduces three inference-time optimization losses that adjust attention maps during generation. Comprehensive evaluations across multiple datasets demonstrate our approach{'}s effectiveness, validity, and flexibility in enhancing semantic binding without additional training."
}Markdown (Informal)
[R-Bind: Unified Enhancement of Attribute and Relation Binding in Text-to-Image Diffusion Models](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.349/) (Zhang & Wan, EMNLP 2025)
ACL