{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T09:11:13Z","timestamp":1774602673850,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"RIE2020 Industry Alignment Fund Industry Collaboration Projects (IAF-ICP) Funding Initiative","award":["I1901E0052"],"award-info":[{"award-number":["I1901E0052"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s11263-025-02727-6","type":"journal-article","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:17:29Z","timestamp":1770351449000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AITTI: Learning Adaptive Inclusive Token for Text-to-Image Generation"],"prefix":"10.1007","volume":"134","author":[{"given":"Xinyu","family":"Hou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoming","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5345-1591","authenticated-orcid":false,"given":"Chen Change","family":"Loy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,6]]},"reference":[{"key":"2727_CR1","doi-asserted-by":"crossref","unstructured":"Bansal, H., Yin, D., Monajatipoor, M., & Chang, K. W. (2022). How well can text-to-image generative models understand ethical natural language interventions?.","DOI":"10.18653\/v1\/2022.emnlp-main.88"},{"key":"2727_CR2","unstructured":"Beaumont, R. (2022). Clip retrieval: Easily compute clip embeddings and build a clip retrieval system with them. https:\/\/github.com\/rom1504\/clip-retrieval."},{"key":"2727_CR3","doi-asserted-by":"crossref","unstructured":"Bianchi, F., Kalluri, P., Durmus, E., Ladhak, F., Cheng, M., Nozza, D., Hashimoto, T., Jurafsky, D., Zou, J., & Caliskan, A. (2023). Easily accessible text-to-image generation amplifies demographic stereotypes at large scale. In: ACM Conference on Fairness, Accountability, and Transparency.","DOI":"10.1145\/3593013.3594095"},{"key":"2727_CR4","unstructured":"BlackForestLabs (2023) FLUX. https:\/\/github.com\/black-forest-labs\/flux."},{"key":"2727_CR5","doi-asserted-by":"crossref","unstructured":"Chinchure, A., Shukla, P., Bhatt, G., Salij, K., Hosanagar, K., Sigal, L., & Turk, M. (2023). TIBET: Identifying and evaluating biases in text-to-image generative models. arXiv preprint arXiv: 2312.01261.","DOI":"10.1007\/978-3-031-72986-7_25"},{"key":"2727_CR6","doi-asserted-by":"crossref","unstructured":"Choi, Y., Park, J., Kim, H., Lee, J., & Park, S. (2024). Fair sampling in diffusion models through switching mechanism. In: Proceedings of the AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v38i20.30202"},{"key":"2727_CR7","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2727_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Ververas, E., Kotsia, I., & Zafeiriou, S. (2020). RetinaFace: Single-shot multi-level face localisation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"2727_CR9","doi-asserted-by":"crossref","unstructured":"D\u2019Inc\u00e0, M., Peruzzo, E., Mancini, M., Xu, D., Goel, V., Xu, X., Wang, Z., Shi, H., & Sebe, N. (2024). Openbias: Open-set bias detection in text-to-image generative models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.01162"},{"key":"2727_CR10","unstructured":"Esser, P., Kulal, S., Blattmann, A., Entezari, R., M\u00fcller, J., Saini, H., Levi, Y., Lorenz, D., Sauer, A., Boesel, F., Podell, D., Dockhorn, T., English, Z., & Rombach, R. (2024). Scaling rectified flow transformers for high-resolution image synthesis. In: ICML."},{"key":"2727_CR11","unstructured":"Friedrich, F., Brack, M., Struppek, L., Hintersdorf, D., Schramowski, P., Luccioni, S., & Kersting, K. (2023). Fair Diffusion: Instructing text-to-image generation models on fairness. arXiv preprint arXiv: 2302.10893."},{"key":"2727_CR12","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A. H., Chechik, G., & Cohen-Or, D. (2022). An image is worth one word: Personalizing text-to-image generation using textual inversion. In: Proceedings of International Conference on Learning Representations (ICLR)."},{"key":"2727_CR13","doi-asserted-by":"crossref","unstructured":"Gandikota, R., Orgad, H., Belinkov, Y., Materzy\u0144ska, J., & Bau, D. (2024). Unified concept editing in diffusion models. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV).","DOI":"10.1109\/WACV57701.2024.00503"},{"key":"2727_CR14","doi-asserted-by":"crossref","unstructured":"Ghosh, S., & Caliskan, A. (2023). \u2019person\u2019 == light-skinned, western man, and sexualization of women of color: Stereotypes in stable diffusion.","DOI":"10.18653\/v1\/2023.findings-emnlp.465"},{"issue":"6","key":"2727_CR15","doi-asserted-by":"publisher","first-page":"1464","DOI":"10.1037\/0022-3514.74.6.1464","volume":"74","author":"AG Greenwald","year":"1998","unstructured":"Greenwald, A. G., McGhee, D. E., & Schwartz, J. L. K. (1998). Measuring individual differences in implicit cognition: the implicit association test. Journal of personality and social psychology,74(6), 1464\u201380.","journal-title":"Journal of personality and social psychology"},{"key":"2727_CR16","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017). Gans trained by a two time-scale update rule converge to a local nash equilibrium. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"2727_CR17","unstructured":"Ho, J., & Salimans, T. (2021). Classifier-free diffusion guidance. In: NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications."},{"key":"2727_CR18","doi-asserted-by":"crossref","unstructured":"Jha, A., Prabhakaran, V., Denton, R., Laszlo, S., Dave, S., Qadri, R., Reddy, C. K., & Dev, S. (2024). Beyond the surface: A global-scale analysis of visual stereotypes in text-to-image generation. arXiv preprint arXiv: 2401.06310.","DOI":"10.18653\/v1\/2024.acl-long.667"},{"key":"2727_CR19","unstructured":"Jiang, Y., Li, W., Zhang, Y., Cai, M., & Yue, X. (2024). Debiasdiff: Debiasing text-to-image diffusion models with self-discovering latent attribute directions. arXiv preprint arXiv:2412.18810."},{"key":"2727_CR20","doi-asserted-by":"crossref","unstructured":"Karkkainen, K., & Joo, J. (2021). FairFace: Face attribute dataset for balanced race, gender, and age for bias measurement and mitigation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV).","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"2727_CR21","unstructured":"Kim, E., Kim, S., Shin, C., & Yoon, S. (2023). De-stereotyping text-to-image models through prompt tuning. In: ICML Workshop."},{"key":"2727_CR22","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., & Zhu, J. Y. (2023). Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"2727_CR23","unstructured":"Li, J., Hu, L., Zhang, J., Zheng, T., Zhang, H., & Wang, D. (2023). Fair text-to-image diffusion via fair mapping. arXiv preprint arXiv: 2311.17695."},{"key":"2727_CR24","unstructured":"Loshchilov, I., & Hutter, F. (2019). Decoupled weight decay regularization. In: Proceedings of International Conference on Learning Representations (ICLR)."},{"key":"2727_CR25","doi-asserted-by":"crossref","unstructured":"Lyu, Y., Yang, Z., Niu, Y., Jiang, J., & Lo, D. (2025). Do existing testing tools really uncover gender bias in text-to-image models? arXiv preprint arXiv:2501.15775.","DOI":"10.1145\/3746027.3755748"},{"key":"2727_CR26","doi-asserted-by":"crossref","unstructured":"Orgad, H., Kawar, B., & Belinkov, Y. (2023). TIME: Editing implicit assumptions in text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV).","DOI":"10.1109\/ICCV51070.2023.00649"},{"key":"2727_CR27","doi-asserted-by":"crossref","unstructured":"Parihar, R., Bhat, A., Basu, A., Mallick, S., Kundu, J. N., & Babu, R. V. (2024). Balancing act: Distribution-guided debiasing in diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.00637"},{"key":"2727_CR28","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., & Rombach, R. (2024). SDXL: Improving latent diffusion models for high-resolution image synthesis. In: Proceedings of International Conference on Learning Representations (ICLR)."},{"key":"2727_CR29","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., and others. (2021). Learning transferable visual models from natural language supervision. In: Proceedings of International Conference on Machine Learning (ICML)."},{"key":"2727_CR30","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2727_CR31","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-Net: Convolutional networks for biomedical image segmentation.","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2727_CR32","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., & Aberman, K. (2023). DreamBooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2727_CR33","unstructured":"Runway, R. (2023). Mitigating stereotypical biases in text to image generative systems. arXiv preprint arXiv: 2310.06904."},{"key":"2727_CR34","unstructured":"Schuhmann, C., Vencu, R., Beaumont, R., Kaczmarczyk, R., Mullis, C., Katta, A., Coombes, T., Jitsev, J., & Komatsuzaki, A. (2021). LAION-400M: Open dataset of clip-filtered 400 million image-text pairs. In: NeurIPS Workshop."},{"key":"2727_CR35","unstructured":"Shen, X., Du, C., Pang, T., Lin, M., Wong, Y., & Kankanhalli. M. (2024). Finetuning text-to-image diffusion models for fairness. In: Proceedings of International Conference on Learning Representations (ICLR)."},{"key":"2727_CR36","doi-asserted-by":"publisher","first-page":"zmad045","DOI":"10.1093\/jcmc\/zmad045","volume":"29","author":"L Sun","year":"2024","unstructured":"Sun, L., Wei, M., Sun, Y., Suh, Y. J., Shen, L., & Yang, S. (2024). Smiling women pitching down: auditing representational and presentational gender biases in image-generative ai. Journal of Computer-Mediated Communication,29, zmad045.","journal-title":"Journal of Computer-Mediated Communication"},{"key":"2727_CR37","doi-asserted-by":"crossref","unstructured":"Teo, C., Abdollahzadeh, M., & Cheung, N. M. M. (2023). On measuring fairness in generative models. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS).","DOI":"10.52202\/075280-0467"},{"key":"2727_CR38","doi-asserted-by":"crossref","unstructured":"Teo, C., Abdollahzadeh, M., Ma, X., & Cheung, N. M. M. (2024). Fairqueue: Rethinking prompt learning for fair text-to-image generation. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS).","DOI":"10.52202\/079017-0720"},{"key":"2727_CR39","doi-asserted-by":"crossref","unstructured":"Wang, J., Liu, X. G., Di, Z., Liu, Y., & Wang, X. (2023). T2IAT: Measuring valence and stereotypical biases in text-to-image generation. In: Findings of the Association for Computational Linguistics.","DOI":"10.18653\/v1\/2023.findings-acl.160"},{"key":"2727_CR40","doi-asserted-by":"crossref","unstructured":"Wang, W., Bai, H., Huang, J. t., Wan, Y., Yuan, Y., Qiu, H., Peng, N., & Lyu, M. R. (2024). New job, new gender? measuring the social bias in image generation models. In: Proceedings of the ACM International Conference on Multimedia (ACM MM).","DOI":"10.1145\/3664647.3681433"},{"key":"2727_CR41","unstructured":"Yesiltepe, H., Akdemir, K., & Yanardag, P. (2024). Mist: Mitigating intersectional bias with disentangled cross-attention editing in text-to-image diffusion models. arXiv preprint arXiv:2403.19738."},{"key":"2727_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, C., Chen, X., Chai, S., Wu, C. H., Lagun, D., Beeler, T., & De\u00a0la Torre, F. (2023). ITI-GEN: Inclusive text-to-image generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV).","DOI":"10.1109\/ICCV51070.2023.00367"},{"key":"2727_CR43","unstructured":"Zhou, J., Gao, J., Zhao, X., Yao, X., & Wei, X. (2024). Association of objects may engender stereotypes: Mitigating association-engendered stereotypes in text-to-image generation. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"2727_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C. C., & Liu, Z. (2022a). Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"2727_CR45","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C. C., & Liu, Z. (2022b). Learning to prompt for vision-language models. In: International Journal of Computer Vision (IJCV).","DOI":"10.1007\/s11263-022-01653-1"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02727-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02727-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02727-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T08:34:46Z","timestamp":1774600486000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02727-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,6]]},"references-count":45,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["2727"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02727-6","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,6]]},"assertion":[{"value":"24 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"108"}}