@inproceedings{luo-xi-2025-enhancing,
title = "Enhancing User-Controlled Text-to-Image Generation with Layout-Aware Personalization",
author = "Luo, Hongliang and
Xi, Wei",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1556/",
pages = "32349--32364",
ISBN = "979-8-89176-251-0",
abstract = "Recent diffusion-based models have advanced text-to-image synthesis, yet struggle to preserve fine visual details and enable precise spatial control in personalized content. We propose **LayoutFlex**, a novel framework that combines a Perspective-Adaptive Feature Extraction system with a Spatial Control Mechanism. Our approach captures fine-grained details via cross-modal representation learning and attention refinement, while enabling precise subject placement through coordinate-aware attention and region-constrained optimization. Experiments show LayoutFlex outperforms prior methods in visual fidelity (DINO $\uparrow$10.8{\%}) and spatial accuracy (AP 43.1$\pm$1.2 vs. 19.3). LayoutFlex supports both single and multi-subject personalization, offering a powerful solution for controllable and coherent image generation in creative and interactive applications."
}
Markdown (Informal)
[Enhancing User-Controlled Text-to-Image Generation with Layout-Aware Personalization](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1556/) (Luo & Xi, ACL 2025)
ACL