{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T21:04:13Z","timestamp":1776978253919,"version":"3.51.4"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100007836","name":"Open Fund of Hunan Provincial Key Laboratory of Intelli-gent Information Processing and Application for Hengyang Normal University","doi-asserted-by":"publisher","award":["IIPA20K01"],"award-info":[{"award-number":["IIPA20K01"]}],"id":[{"id":"10.13039\/100007836","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008082","name":"Horizontal Research Project of Hengyang Normal University","doi-asserted-by":"publisher","award":["HXKY2022006"],"award-info":[{"award-number":["HXKY2022006"]}],"id":[{"id":"10.13039\/501100008082","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/access.2026.3683893","type":"journal-article","created":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:50:56Z","timestamp":1776887456000},"page":"59607-59623","source":"Crossref","is-referenced-by-count":0,"title":["FairCLIP: Bridging Visual Diversity and Linguistic Representation for Equitable Image Synthesis"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-4935-5607","authenticated-orcid":false,"given":"Xinxing","family":"Li","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Hengyang Normal University, Hengyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7830-8167","authenticated-orcid":false,"given":"Guangyong","family":"Zheng","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hengyang Normal University, Hengyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2791-5503","authenticated-orcid":false,"given":"Jiacheng","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hengyang Normal University, Hengyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8389-5320","authenticated-orcid":false,"given":"Wenli","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Hunan Normal University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"214","article-title":"Generative adversarial text to image synthesis","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Reed"},{"key":"ref2","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ho"},{"key":"ref3","article-title":"Hierarchical text-conditional image generation with CLIP latents","author":"Ramesh","year":"2022","journal-title":"arXiv:2204.06125"},{"key":"ref4","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","author":"Saharia","year":"2022","journal-title":"arXiv:2205.11487"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"ref6","article-title":"Microsoft COCO captions: Data collection and evaluation server","author":"Chen","year":"2015","journal-title":"arXiv:1504.00325"},{"key":"ref7","first-page":"13893","article-title":"Bias and generalization in deep generative models: An empirical study","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhao"},{"key":"ref8","article-title":"Imperfect ImaGANation: Implications of GANs exacerbating biases on facial data augmentation and snapchat selfie lenses","author":"Jain","year":"2020","journal-title":"arXiv:2001.09528"},{"key":"ref9","article-title":"Fair diffusion: Instructing text-to-image generation models on fairness","author":"Friedrich","year":"2023","journal-title":"arXiv:2302.10893"},{"key":"ref10","article-title":"Debiasing vision-language models via biased prompts","author":"Chuang","year":"2023","journal-title":"arXiv:2302.00070"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.88"},{"key":"ref12","first-page":"1887","article-title":"Fair generative modeling via weak supervision","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"1","author":"Choi"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622525"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1629"},{"key":"ref15","article-title":"Evaluating CLIP: Towards characterization of broader capabilities and downstream implications","author":"Agarwal","year":"2021","journal-title":"arXiv:2108.02818"},{"key":"ref16","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Radford"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref19","first-page":"19896","article-title":"CogView: Mastering text-to-image generation via transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ding"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00934"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00541"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00894"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00367"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01140"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01162"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-emnlp.42"},{"key":"ref27","first-page":"18411","article-title":"FairGen: Enhancing fairness in text-to-image diffusion models via self-discovering latent directions","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV)","author":"Jiang"},{"key":"ref28","first-page":"17567","article-title":"Fair generation without unfair distortions: Debiasing text-to-image generation with entanglement-free attention","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV)","author":"Park"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref31","article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","author":"Gal","year":"2022","journal-title":"arXiv:2208.01618"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref33","article-title":"Multi-concept customization of text-to-image diffusion","author":"Kumari","year":"2022","journal-title":"arXiv:2212.04488"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3646548"},{"key":"ref35","article-title":"Text to image generation and editing: A survey","author":"Yang","year":"2025","journal-title":"arXiv:2505.02527"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-025-11424-2"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"ref38","first-page":"520","article-title":"FairFace: Face attribute dataset for balanced race, gender, and age","volume-title":"Proc. Winter Conf. Appl. Comput. Vis. (WACV)","author":"K\u00e4rkk\u00e4inen"},{"key":"ref39","article-title":"Hierarchical cross-modal alignment for attribute-aware text-to-image generation","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Li"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/11323511\/11481190.pdf?arnumber=11481190","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T20:00:05Z","timestamp":1776974405000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11481190\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/access.2026.3683893","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}