{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T14:35:57Z","timestamp":1776090957497,"version":"3.50.1"},"publisher-location":"Cham","reference-count":65,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732195","type":"print"},{"value":"9783031732201","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73220-1_6","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T20:02:55Z","timestamp":1730577775000},"page":"92-111","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["TP2O: Creative Text Pair-to-Object Generation Using Balance Swap-Sampling"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3716-671X","authenticated-orcid":false,"given":"Jun","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3328-1713","authenticated-orcid":false,"given":"Zedong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Avrahami, O., et al.: Spatext: spatio-textual representation for controllable image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18370\u201318380 (2023)","DOI":"10.1109\/CVPR52729.2023.01762"},{"key":"6_CR2","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016\/S0004-3702(98)00055-1","volume":"103","author":"MA Boden","year":"1998","unstructured":"Boden, M.A.: Creativity and artificial intelligence. Artif. Intell. 103, 347\u2013356 (1998)","journal-title":"Artif. Intell."},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Boden, M.A.: The Creative Mind-Myths and Mechanisms. Taylor & Francis e-Library (2004)","DOI":"10.4324\/9780203508527"},{"key":"6_CR4","unstructured":"Boutin, V., et al.: Diffusion models as artists: are we closing the gap between humans and machines? In: Proceedings of the International Conference on Machine Learning (ICML), pp. 2953\u20133002 (2023)"},{"key":"6_CR5","unstructured":"Boutin, V., Singhal, L., Thomas, X., Serre, T.: Diversity vs. recognizability: human-like generalization in one-shot generative models. In: Proceedings of Advances in Neural Information Processing Systems, pp. 20933\u201320946 (2022)"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Cetinic, E., She, J.: Understanding and creating art with AI: review and outlook. ACM Trans. Multimedia Comput. Commun. Appl. 18(2), Article 66 (2022)","DOI":"10.1145\/3475799"},{"issue":"4","key":"6_CR8","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1145\/3592116","volume":"42","author":"H Chefer","year":"2023","unstructured":"Chefer, H., Alaluf, Y., Vinker, Y., Wolf, L., Cohen-Or, D.: Attend-and-excite: attention-based semantic guidance for text-to-image diffusion models. ACM Trans. Graph. 42(4), 148 (2023)","journal-title":"ACM Trans. Graph."},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Cintas, C., Das, P., Quanz, B., Tadesse, G.A., Speakman, S., Chen, P.Y.: Towards creativity characterization of generative models via group-based subset scanning. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 4929\u20134935 (2022)","DOI":"10.24963\/ijcai.2022\/683"},{"key":"6_CR10","unstructured":"Cong, Y., Min, M.R., Li, L.E., Rosenhahn, B., Yang, M.Y.: Attribute-centric compositional text-to-image generation. arXiv:2301.01413 (2023)"},{"key":"6_CR11","unstructured":"Dai, Y., et al.: Harmonious group choreography with trajectory-controllable diffusion. arXiv preprint arXiv:2403.06189 (2024)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Das, P., Quanz, B., Chen, P.Y., wook Ahn, J., Shah, D.: Toward a neuro-inspired creative decoder. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 2746\u20132753 (2020)","DOI":"10.24963\/ijcai.2020\/381"},{"issue":"4","key":"6_CR13","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1109\/MSP.2022.3141365","volume":"39","author":"P Das","year":"2022","unstructured":"Das, P., Varshney, L.R.: Explaining artificial intelligence generation and creativity. IEEE Signal Process. Mag. 39(4), 85\u201395 (2022)","journal-title":"IEEE Signal Process. Mag."},{"key":"6_CR14","unstructured":"daspartho: MagicMix (2022). https:\/\/github.com\/daspartho\/MagicMix#magicmix"},{"key":"6_CR15","unstructured":"Du, X., Sun, Y., Zhu, X., Li, Y.: Dream the impossible: outlier imagination with diffusion models. In: Proceedings of Advances in Neural Information Processing Systems (2023)"},{"key":"6_CR16","unstructured":"Du, Y., Li, S., Mordatch, I.: Compositional visual generation with energy based models. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS), pp. 6637\u20136647 (2020)"},{"key":"6_CR17","unstructured":"Elgammal, A., Liu, B., Elhoseiny, M., Mazzone, M.: Can: creative adversarial networks: generating \u201cart\u201d by learning about styles and deviating from style norms. In: International Conference on Computational Creativity (ICCC), pp. 96\u2013103 (2017)"},{"key":"6_CR18","unstructured":"Feng, W., et al.: Training-free structured diffusion guidance for compositional text-to-image synthesis. In: Proceedings of the International Conference on Learning Representations (ICLR) (2023)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Feng, Z., et al.: Ernie-vilg 2.0: improving text-to-image diffusion model with knowledge-enhanced mixture-of-denoising-experts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10135\u201310145 (2023)","DOI":"10.1109\/CVPR52729.2023.00977"},{"key":"6_CR20","unstructured":"Frans, K., Soros, L.B., Witkowski, O.: Clipdraw: exploring text-to-drawing synthesis through language-image encoders. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"6_CR21","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. In: Proceedings of the International Conference on Learning Representations (ICLR) (2023)"},{"key":"6_CR22","unstructured":"Ge, S., Goswami, V., Zitnick, C.L., Parikh, D.: Creative sketch generation. In: Proceedings of the International Conference on Learning Representations (ICLR) (2021)"},{"key":"6_CR23","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS), pp. 2672\u20132680 (2014)"},{"key":"6_CR24","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. In: Proceedings of the International Conference on Learning Representations (ICLR) (2023)"},{"key":"6_CR25","unstructured":"Hinton, G., Srivastava, N., Swersky, K.: Neural networks for machine learning lecture 6A overview of mini-batch gradient descent. University of Toronto, Course-CSC321 (2012)"},{"issue":"2","key":"6_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.chb.2022.107502","volume":"139","author":"J Hitsuwari","year":"2023","unstructured":"Hitsuwari, J., Ueda, Y., Yun, W., Nomura, M.: Does human-AI collaboration lead to more creative art? Aesthetic evaluation of human-made and AI-generated haiku poetry. Comput. Hum. Behav. 139(2), 107502 (2023)","journal-title":"Comput. Hum. Behav."},{"key":"6_CR27","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS), pp. 6840\u20136851 (2020)"},{"key":"6_CR28","unstructured":"Kawar, B., Elad, M., Ermon, S., Song, J.: Denoising diffusion restoration models. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"6_CR29","unstructured":"Khemakhem, I., Kingma, D., Monti, R., Hyvarinen, A.: Variational autoencoders and nonlinear ICA: a unifying framework. In: Proceedings of International Conference on Artificial Intelligence and Statistics (AISTATS), pp. 2207\u20132217 (2020)"},{"key":"6_CR30","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: Proceedings of the International Conference on Learning Representations (ICLR) (2014)"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"6_CR32","unstructured":"Kirstain, Y., Polyak, A., Singer, U., Matiana, S., Penna, J., Levy, O.: Pick-a-pic: an open dataset of user preferences for text-to-image generation. arXiv preprint arXiv:2305.01569 (2023)"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Li, Z., Min, M.R., Li, K., Xu, C.: Stylet2i: toward compositional and high-fidelity text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18197\u201318207 (2022)","DOI":"10.1109\/CVPR52688.2022.01766"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Liao, W., Hu, K., Yang, M.Y., Rosenhahn, B.: Text to image generation with semantic-spatial aware GAN. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18187\u201318196 (2022)","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"6_CR36","unstructured":"Liew, J.H., Yan, H., Zhou, D., Feng, J.: Magicmix: semantic mixing with diffusion models. arXiv:2210.16056 (2022). https:\/\/magicmix.github.io\/"},{"key":"6_CR37","unstructured":"Liu, L., Ren, Y., Lin, Z., Zhao, Z.: Pseudo numerical methods for diffusion models on manifolds. In: Proceedings of International Conference on Learning Representations (ICLR) (2022)"},{"key":"6_CR38","doi-asserted-by":"crossref","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: Proceedings of the European Conference Computer Vision (ECCV), pp. 423\u2013439 (2022)","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"6_CR39","unstructured":"Maher, M.L.: Evaluating creativity in humans, computers, and collectively intelligent systems. In: Proceedings of the 1st DESIRE Network Conference on Creativity and Innovation in Design, pp. 22\u201328 (2010)"},{"issue":"2","key":"6_CR40","volume":"144","author":"AH Nobari","year":"2021","unstructured":"Nobari, A.H., Chen, W., Ahmed, F.: Range-constrained generative adversarial network: design synthesis under constraints using conditional generative adversarial networks. J. Mech. Des. 144(2), 021708 (2021)","journal-title":"J. Mech. Des."},{"key":"6_CR41","unstructured":"Nobari, A.H., Rashad, M.F., Ahmed, F.: Creativegan: editing generative adversarial networks for creative design synthesis. In: Proceedings of ASME International Design Engineering Technical Conferences and Computers and Information in Engineering Conference (2021)"},{"key":"6_CR42","doi-asserted-by":"crossref","unstructured":"Orgad, H., Kawar, B., Belinkov, Y.: Editing implicit assumptions in text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7053\u20137061 (2023)","DOI":"10.1109\/ICCV51070.2023.00649"},{"key":"6_CR43","unstructured":"Park, D.H., Azadi, S., Liu, X., Darrell, T., Rohrbach, A.: Benchmark for compositional text-to-image synthesis. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"6_CR44","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of International Conference on Machine Learning (ICML), pp. 8748\u20138763 (2021)"},{"key":"6_CR45","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. In: arXiv:2204.06125 (2022)"},{"key":"6_CR46","unstructured":"Ren, J., et al.: Out-of-distribution detection and selective generation for conditional language models. In: Proceedings of International Conference on Learning Representations (ICLR) (2023)"},{"key":"6_CR47","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR48","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)","DOI":"10.1109\/CVPR52729.2023.02155"},{"issue":"3","key":"6_CR49","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision (IJCV) 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"6_CR50","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"6_CR51","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS): Datasets and Benchmarks Track, pp. 25278\u201325294 (2022)"},{"key":"6_CR52","first-page":"1","volume":"23","author":"X Shen","year":"2022","unstructured":"Shen, X., Liu, F., Dong, H., Lian, Q., Chen, Z., Zhang, T.: Weakly supervised disentangled generative causal representation learning. J. Mach. Learn. Res. 23, 1\u201355 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"6_CR53","unstructured":"Shen, Z., et al.: Towards out-of-distribution generalization: a survey. arXiv preprint arXiv:2108.13624 (2021)"},{"key":"6_CR54","unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models. In: Proceedings of International Conference on Machine Learning (ICML) (2023)"},{"key":"6_CR55","unstructured":"Tr\u00e4uble, F., et al.: On disentangled representations learned from correlated data. In: Proceedings of International Conference on Machine Learning (ICML), pp. 10401\u201310412 (2021)"},{"key":"6_CR56","doi-asserted-by":"crossref","unstructured":"Wang, R., Que, G., Chen, S., Li, X., Li, J., Yang, J.: Creative birds: self-supervised single-view 3D style transfer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8775\u20138784 (2023)","DOI":"10.1109\/ICCV51070.2023.00806"},{"key":"6_CR57","unstructured":"Wu, X., et al.: Human preference score V2: a solid benchmark for evaluating human preferences of text-to-image synthesis. arXiv preprint arXiv:2306.09341 (2023)"},{"key":"6_CR58","unstructured":"Xu, R., Zhang, X., Shen, Z., Zhang, T., Cui, P.: A theoretical analysis on independence-driven importance weighting for covariate-shift generalization. In: Proceedings of International Conference on Machine Learning (ICML), pp. 24803\u201324829 (2022)"},{"key":"6_CR59","doi-asserted-by":"crossref","unstructured":"Ye, N., et al.: OOD-bench: quantifying and understanding two dimensions of out-of-distribution generalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7947\u20137958 (2022)","DOI":"10.1109\/CVPR52688.2022.00779"},{"key":"6_CR60","unstructured":"Yu, J., et al.: Scaling autoregressive models for content-rich text-to-image generation. Trans. Mach. Learn. Res. (2022)"},{"key":"6_CR61","unstructured":"Zhang, Y., Zhou, D., Hooi, B., Wang, K., Feng, J.: Expanding small-scale datasets with guided imagination. In: Proceedings of Advances in Neural Information Processing Systems (2023)"},{"key":"6_CR62","unstructured":"Zhao, W., Bai, L., Rao, Y., Zhou, J., Lu, J.: UNIPC: a unified predictor-corrector framework for fast sampling of diffusion models. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS) (2023)"},{"key":"6_CR63","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, Y., Hospedales, T., Xiang, T.: Deep domain-adversarial image generation for domain generalisation. In: Proceedings of AAAI Conference on Artificial Intelligence (AAAI), vol.\u00a034, pp. 13025\u201313032 (2020)","DOI":"10.1609\/aaai.v34i07.7003"},{"key":"6_CR64","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, Y., Hospedales, T., Xiang, T.: Learning to generate novel domains for domain generalization. In: Proceedings of European Conference Computer Vision (ECCV), pp. 561\u2013578 (2020)","DOI":"10.1007\/978-3-030-58517-4_33"},{"key":"6_CR65","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: DM-GAN: dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5802\u20135810 (2019)","DOI":"10.1109\/CVPR.2019.00595"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73220-1_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T20:04:30Z","timestamp":1730577870000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73220-1_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9783031732195","9783031732201"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73220-1_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}