{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:31:12Z","timestamp":1759883472865,"version":"build-2065373602"},"reference-count":82,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"MUR PNRR project FAIR","award":["PE00000013"],"award-info":[{"award-number":["PE00000013"]}]},{"DOI":"10.13039\/100031478","name":"NextGenerationEU","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100031478","id-type":"DOI","asserted-by":"publisher"}]},{"name":"EU Horizon projects ELIAS","award":["101120237"],"award-info":[{"award-number":["101120237"]}]},{"name":"ELLIOT","award":["10121439"],"award-info":[{"award-number":["10121439"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/tpami.2025.3592901","type":"journal-article","created":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T17:56:43Z","timestamp":1753466203000},"page":"9863-9875","source":"Crossref","is-referenced-by-count":0,"title":["GradBias: Unveiling Word Influence on Bias in Text-to-Image Generative Models"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-5770-8723","authenticated-orcid":false,"given":"Moreno","family":"D\u2019Inc\u00e0","sequence":"first","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"given":"Elia","family":"Peruzzo","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8595-9955","authenticated-orcid":false,"given":"Massimiliano","family":"Mancini","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"given":"Xingqian","family":"Xu","sequence":"additional","affiliation":[{"name":"SHI Labs &#x0040; Georgia Tech &#x0026; UIUC &#x0026; Picsart AI Research (PAIR), Bellevue, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2922-5663","authenticated-orcid":false,"given":"Humphrey","family":"Shi","sequence":"additional","affiliation":[{"name":"SHI Labs &#x0040; Georgia Tech &#x0026; UIUC &#x0026; Picsart AI Research (PAIR), Bellevue, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6597-7248","authenticated-orcid":false,"given":"Nicu","family":"Sebe","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]}],"member":"263","reference":[{"key":"ref1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Saharia"},{"key":"ref2","first-page":"16784","article-title":"GLIDE: Towards photorealistic image generation and editing with text-guided diffusion models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nichol"},{"article-title":"Hierarchical text-conditional image generation with clip latents","year":"2022","author":"Ramesh","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref5","first-page":"1862","article-title":"SDXL: Improving latent diffusion models for high-resolution image synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Podell"},{"key":"ref6","first-page":"16222","article-title":"Diffusion self-guidance for controllable image generation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Epstein"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"article-title":"Prompt-to-prompt image editing with cross attention control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hertz","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00822"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","year":"2022","author":"Gal","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01762"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref14","first-page":"13753","article-title":"Composer: Creative and controllable image synthesis with composable conditions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Huang"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534162"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449950"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00367"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s43681-024-00531-5"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00283"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3194770.3194776"},{"article-title":"RepFair-GAN: Mitigating representation bias in GANs using gradient clipping","year":"2022","author":"Kenfack","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622525"},{"key":"ref23","first-page":"20673","article-title":"Learning from failure: De-biasing classifier from biased classifier","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Nam"},{"key":"ref24","first-page":"2798","article-title":"Intra-processing methods for debiasing neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Savani"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00894"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01010"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00395"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00463"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00637"},{"key":"ref30","first-page":"25458","article-title":"Finetuning text-to-image diffusion models for fairness","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Shen"},{"article-title":"Unbiased image synthesis via manifold-driven sampling in diffusion models","year":"2023","author":"Su","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.353"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01162"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aies.v7i1.31657"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103552"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_31"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_47"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1323"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594095"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604711"},{"article-title":"On the opportunities and risks of foundation models","year":"2021","author":"Bommasani","key":"ref42"},{"article-title":"Language models are few-shot learners","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Brown","key":"ref43"},{"key":"ref44","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wei"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.357"},{"article-title":"LLaMA: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref46"},{"key":"ref47","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","key":"ref49"},{"key":"ref50","article-title":"ChatGPT asks, BLIP-2 answers: Automatic questioning towards enriched visual descriptions","author":"Zhu","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref51","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref52","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01436"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01092"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01866"},{"article-title":"Video chatcaptioner: Towards the enriched spatiotemporal descriptions","year":"2023","author":"Chen","key":"ref56"},{"article-title":"Bias-to-text: Debiasing unknown visual biases through language interpretation","year":"2023","author":"Kim","key":"ref57"},{"article-title":"Improving the fairness of deep generative models without retraining","year":"2021","author":"Tan","key":"ref58"},{"article-title":"Classifier-free diffusion guidance","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst. Workshop Deep Generative Models Downstream Appl.","author":"Ho","key":"ref59"},{"article-title":"Mitigating inappropriateness in image generation: Can there be value in reflecting the world\u2019s ugliness?","year":"2023","author":"Brack","key":"ref60"},{"key":"ref61","first-page":"25365","article-title":"SEGA: Instructing text-to-image models using semantic guidance","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Manuel"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.634"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671458"},{"article-title":"Word-level explanations for analyzing bias in text-to-image models","year":"2023","author":"Lin","key":"ref64"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00840"},{"article-title":"Severity controlled text-to-image generative model bias manipulation","year":"2024","author":"Vice","key":"ref66"},{"article-title":"Selective annotation makes language models better few-shot learners","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Su","key":"ref67"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.2172\/4167340"},{"key":"ref70","first-page":"23318","article-title":"OFA: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.488"},{"key":"ref72","first-page":"12888","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"article-title":"Mistral 7B","year":"2023","author":"Team","key":"ref77"},{"article-title":"Un ministral, des ministraux","year":"2024","author":"Team","key":"ref78"},{"article-title":"Mistral small 3","year":"2025","author":"Team","key":"ref79"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00524"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3597307"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11192800\/11097063.pdf?arnumber=11097063","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T17:41:20Z","timestamp":1759858880000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11097063\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":82,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3592901","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}