@inproceedings{wan-etal-2024-factuality,
title = "The Factuality Tax of Diversity-Intervened Text-to-Image Generation: Benchmark and Fact-Augmented Intervention",
author = "Wan, Yixin and
Wu, Di and
Wang, Haoran and
Chang, Kai-Wei",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.emnlp-main.513/",
doi = "10.18653/v1/2024.emnlp-main.513",
pages = "9082--9100",
abstract = "Prompt-based {\textquotedblleft}diversity interventions{\textquotedblright} are commonly adopted to improve the diversity of Text-to-Image (T2I) models depicting individuals with various racial or gender traits. However, will this strategy result in nonfactual demographic distribution, especially when generating real historical figures? In this work, we propose **DemOgraphic FActualIty Representation (DoFaiR)**, a benchmark to systematically quantify the trade-off between using diversity interventions and preserving demographic factuality in T2I models. DoFaiR consists of 756 meticulously fact-checked test instances to reveal the factuality tax of various diversity prompts through an automated evidence-supported evaluation pipeline. Experiments on DoFaiR unveil that diversity-oriented instructions increase the number of different gender and racial groups in DALLE-3`s generations at the cost of historically inaccurate demographic distributions. To resolve this issue, we propose **Fact-Augmented Intervention** (FAI), which instructs a Large Language Model (LLM) to reflect on verbalized or retrieved factual information about gender and racial compositions of generation subjects in history, and incorporate it into the generation context of T2I models. By orienting model generations using the reflected historical truths, FAI significantly improves the demographic factuality under diversity interventions while preserving diversity."
}
Markdown (Informal)
[The Factuality Tax of Diversity-Intervened Text-to-Image Generation: Benchmark and Fact-Augmented Intervention](https://preview.aclanthology.org/ingest_wac_2008/2024.emnlp-main.513/) (Wan et al., EMNLP 2024)
ACL