{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:57:43Z","timestamp":1776182263737,"version":"3.50.1"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031901669","type":"print"},{"value":"9783031901676","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-90167-6_28","type":"book-chapter","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T02:14:08Z","timestamp":1745288048000},"page":"417-433","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Steering Large Text-to-Image Model for\u00a0Kandinsky Synthesis Through Preference-Based Prompt Optimization"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8726-6797","authenticated-orcid":false,"given":"Aven-Le","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5381-781X","authenticated-orcid":false,"given":"Wei","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1976-7661","authenticated-orcid":false,"given":"Yu-Ao","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3802-7535","authenticated-orcid":false,"given":"Kang","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,20]]},"reference":[{"key":"28_CR1","unstructured":"Bailey, J.: Why love generative art? (2018). https:\/\/www.artnome.com\/news\/2018\/8\/8\/why-love-generative-art"},{"key":"28_CR2","unstructured":"Cloneofsimo: Cloneofsimo\/lora: using low-rank adaptation to quickly fine-tune diffusion models. https:\/\/github.com\/cloneofsimo\/lora"},{"key":"28_CR3","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1007\/978-3-031-19836-6_6","volume-title":"Computer Vision - ECCV 2022","author":"K Crowson","year":"2022","unstructured":"Crowson, K., Biderman, S., Kornis, D., Stander, D., Hallahan, E., Castricato, L., Raff, E.: Vqgan-clip: open domain image generation and editing with natural language guidance. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, pp. 88\u2013105. Springer Nature Switzerland, Cham (2022)"},{"key":"28_CR4","doi-asserted-by":"publisher","unstructured":"Feng, W., et al.: Training-free structured diffusion guidance for compositional text-to-image synthesis (2023). (arXiv:2212.05032) https:\/\/doi.org\/10.48550\/arXiv.2212.05032. http:\/\/arxiv.org\/abs\/2212.05032. arXiv:2212.05032","DOI":"10.48550\/arXiv.2212.05032"},{"key":"28_CR5","doi-asserted-by":"publisher","unstructured":"Galanter, P.: Generative Art Theory, pp. 146\u2013180. John Wiley and Sons, Ltd (2016). https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1002\/9781118475249.ch5. https:\/\/doi.org\/10.1002\/9781118475249.ch5","DOI":"10.1002\/9781118475249.ch5"},{"key":"28_CR6","doi-asserted-by":"publisher","unstructured":"Grefenstette, J.J.: Genetic algorithms and machine learning. In: Proceedings of the Sixth Annual Conference on Computational learning theory - COLT \u201993, pp. 3-4. ACM Press, Santa Cruz, California, United States (1993). https:\/\/doi.org\/10.1145\/168304.168305. http:\/\/portal.acm.org\/citation.cfm?doid=168304.168305","DOI":"10.1145\/168304.168305"},{"key":"28_CR7","doi-asserted-by":"publisher","unstructured":"Hao, Y., Chi, Z., Dong, L., Wei, F.: Optimizing prompts for text-to-image generation (2023). (arXiv:2212.09611). https:\/\/doi.org\/10.48550\/arXiv.2212.09611. http:\/\/arxiv.org\/abs\/2212.09611. arXiv:2212.09611","DOI":"10.48550\/arXiv.2212.09611"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Holland, J.H.: Adaptation in natural and artificial systems: an introductory analysis with applications to biology, control, and artificial intelligence. MIT Press (1992). google-Books-ID: 5EgGaBkwvWcC","DOI":"10.7551\/mitpress\/1090.001.0001"},{"key":"28_CR9","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models (2021). (arXiv:2106.09685). http:\/\/arxiv.org\/abs\/2106.09685. arXiv:2106.09685"},{"key":"28_CR10","volume-title":"Concerning the Spiritual in Art","author":"W Kandinsky","year":"1977","unstructured":"Kandinsky, W.: Concerning the Spiritual in Art. Dover Publications Inc (1977)"},{"key":"28_CR11","unstructured":"Kandinsky, W.: Point and Line to Plane. Courier Corporation (1979)"},{"key":"28_CR12","unstructured":"Kovach, B.: A box of chaos: the generative artist\u2019s toolkit (2018). https:\/\/www.youtube.com\/watch?v=kZNTozzsNqk"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion, pp. 1931\u20131941 (2023). https:\/\/openaccess.thecvf.com\/content\/CVPR2023\/html\/Kumari_Multi-Concept_Customization_of_Text-to-Image_Diffusion_CVPR_2023_paper.html","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Lim, S.M., Sultan, A.B.M., Sulaiman, M.N., Mustapha, A., Leong, K.Y.: Crossover and mutation operators of genetic algorithms. Int. J. Mach. Learn. Comput. 7(1), 9\u201312 (2017). https:\/\/doi.org\/10.18178\/ijmlc.2017.7.1.611","DOI":"10.18178\/ijmlc.2017.7.1.611"},{"key":"28_CR15","unstructured":"Lin, Z., Ehsan, U., Agarwal, R., Dani, S., Vashishth, V., Riedl, M.: Beyond prompts: exploring the design space of mixed-initiative co-creativity systems (2023). (arXiv:2305.07465). http:\/\/arxiv.org\/abs\/2305.07465. arXiv:2305.07465"},{"key":"28_CR16","doi-asserted-by":"publisher","unstructured":"Liu, V., Chilton, L.B.: Design guidelines for prompt engineering text-to-image generative models. In: Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems, pp. 1\u201323. CHI \u201922, Association for Computing Machinery, New York (2022). https:\/\/doi.org\/10.1145\/3491102.3501825. https:\/\/dl.acm.org\/doi\/10.1145\/3491102.3501825","DOI":"10.1145\/3491102.3501825"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Martins, T., Cunha, J.M., Correia, J., Machado, P.: Towards the evolution of prompts with metaprompter. In: International Conference on Computational Intelligence in Music, Sound, Art and Design (Part of EvoStar), pp. 180\u2013195. Springer (2023)","DOI":"10.1007\/978-3-031-29956-8_12"},{"key":"28_CR18","unstructured":"McCormack, J.: Art, emergence, and the computational sublime"},{"issue":"2","key":"28_CR19","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/LEON_a_00533","volume":"47","author":"J McCormack","year":"2014","unstructured":"McCormack, J., Bown, O., Dorin, A., McCabe, J., Monro, G., Whitelaw, M.: Ten questions concerning generative computer art. Leonardo 47(2), 135\u2013141 (2014). https:\/\/doi.org\/10.1162\/LEON_a_00533","journal-title":"Leonardo"},{"issue":"5","key":"28_CR20","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1162\/leon.2009.42.5.476","volume":"42","author":"G Monro","year":"2009","unstructured":"Monro, G.: Emergence and generative art. Leonardo 42(5), 476\u2013477 (2009). https:\/\/doi.org\/10.1162\/leon.2009.42.5.476","journal-title":"Leonardo"},{"key":"28_CR21","doi-asserted-by":"publisher","unstructured":"Oppenlaender, J.: The creativity of text-to-image generation. In: Proceedings of the 25th International Academic Mindtrek Conference, pp. 192-202. Academic Mindtrek \u201922, Association for Computing Machinery, New York (2022). https:\/\/doi.org\/10.1145\/3569219.3569352. https:\/\/dl.acm.org\/doi\/10.1145\/3569219.3569352","DOI":"10.1145\/3569219.3569352"},{"key":"28_CR22","doi-asserted-by":"publisher","unstructured":"Pavlichenko, N., Ustalov, D.: Best prompts for text-to-image models and how to find them. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2067-2071. SIGIR \u201923, Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3539618.3592000. https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3592000","DOI":"10.1145\/3539618.3592000"},{"key":"28_CR23","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning, pp. 8748-8763. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"28_CR24","doi-asserted-by":"publisher","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. (arXiv:2204.06125) (2022). https:\/\/doi.org\/10.48550\/arXiv.2204.06125. http:\/\/arxiv.org\/abs\/2204.06125. arXiv:2204.06125","DOI":"10.48550\/arXiv.2204.06125"},{"key":"28_CR25","doi-asserted-by":"publisher","unstructured":"Reynolds, L., McDonell, K.: Prompt programming for large language models: beyond the few-shot paradigm. In: Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems, pp. 1\u20137. CHI EA \u201921, Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3411763.3451760. https:\/\/dl.acm.org\/doi\/10.1145\/3411763.3451760","DOI":"10.1145\/3411763.3451760"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models, pp. 10684-10695 (2022). https:\/\/openaccess.thecvf.com\/content\/CVPR2022\/html\/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.html","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"28_CR27","doi-asserted-by":"publisher","unstructured":"Rombach, R., Blattmann, A., Ommer, B.: Text-guided synthesis of artistic images with retrieval-augmented diffusion models (2022). (arXiv:2207.13038). https:\/\/doi.org\/10.48550\/arXiv.2207.13038. http:\/\/arxiv.org\/abs\/2207.13038. arXiv:2207.13038","DOI":"10.48550\/arXiv.2207.13038"},{"key":"28_CR28","doi-asserted-by":"publisher","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation (2023). (arXiv:2208.12242). https:\/\/doi.org\/10.48550\/arXiv.2208.12242. http:\/\/arxiv.org\/abs\/2208.12242. arXiv:2208.12242","DOI":"10.48550\/arXiv.2208.12242"},{"key":"28_CR29","unstructured":"Runwayml: Runwayml\/stable-diffusion-v1-5 \u00b7 hugging face. https:\/\/huggingface.co\/runwayml\/stable-diffusion-v1-5"},{"key":"28_CR30","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems. vol. 35, pp. 36479-36494. Curran Associates, Inc. (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ec795aeadae0b7d230fa35cbaf04c041-Paper-Conference.pdf"},{"key":"28_CR31","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: Proceedings of the 32nd International Conference on Machine Learning, pp. 2256-2265. PMLR (2015). https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"28_CR32","doi-asserted-by":"publisher","unstructured":"Tang, Z., Rybin, D., Chang, T.H.: Zeroth-order optimization meets human feedback: provable learning via ranking oracles (2023). (arXiv:2303.03751). https:\/\/doi.org\/10.48550\/arXiv.2303.03751. http:\/\/arxiv.org\/abs\/2303.03751. arXiv:2303.03751","DOI":"10.48550\/arXiv.2303.03751"},{"key":"28_CR33","unstructured":"Toriato: Toriato\/stable-diffusion-webui-wd14-tagger: Labeling extension for automatic1111\u2019s web ui. https:\/\/github.com\/toriato\/stable-diffusion-webui-wd14-tagger"},{"key":"28_CR34","unstructured":"Tsukisuwa, N.: Difflora (2023). https:\/\/pixai.art\/model"},{"key":"28_CR35","unstructured":"Von R\u00fctte, D., Fedele, E., Thomm, J., Wolf, L.: Fabric: personalizing diffusion models with iterative feedback (2023). (arXiv:2307.10159). http:\/\/arxiv.org\/abs\/2307.10159. arXiv:2307.10159"},{"key":"28_CR36","doi-asserted-by":"publisher","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: encoding visual concepts into textual embeddings for customized text-to-image generation (2023). (arXiv:2302.13848). https:\/\/doi.org\/10.48550\/arXiv.2302.13848. http:\/\/arxiv.org\/abs\/2302.13848. arXiv:2302.13848","DOI":"10.48550\/arXiv.2302.13848"},{"key":"28_CR37","doi-asserted-by":"publisher","unstructured":"Wu, X., Sun, K., Zhu, F., Zhao, R., Li, H.: Human preference score: better aligning text-to-image models with human preference (2023). (arXiv:2303.14420). https:\/\/doi.org\/10.48550\/arXiv.2303.14420. http:\/\/arxiv.org\/abs\/2303.14420. arXiv:2303.14420","DOI":"10.48550\/arXiv.2303.14420"},{"key":"28_CR38","unstructured":"Yu, J., et al.: Scaling autoregressive models for content-rich text-to-image generation. transactions on machine learning research (2022). https:\/\/openreview.net\/forum?id=AFDcYJKhND"},{"key":"28_CR39","unstructured":"Zhang, N., Li, L., Chen, X., Deng, S., Bi, Z., Tan, C., Huang, F., Chen, H.: Differentiable prompt makes pre-trained language models better few-shot learners (2022). (arXiv:2108.13161). http:\/\/arxiv.org\/abs\/2108.13161. DOI=10.48550\/arXiv.2108.13161. arXiv:2108.13161"},{"key":"28_CR40","doi-asserted-by":"publisher","unstructured":"Zhou, A.L., Wang, Y.A., Wu, W., Zhang, K.: Kandinsky as you preferred. In: ACM SIGGRAPH 2024 Posters. SIGGRAPH \u201924, Association for Computing Machinery, New York (2024). https:\/\/doi.org\/10.1145\/3641234.3671061. https:\/\/doi.org\/10.1145\/3641234.3671061","DOI":"10.1145\/3641234.3671061"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Music, Sound, Art and Design"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-90167-6_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T02:14:23Z","timestamp":1745288063000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-90167-6_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031901669","9783031901676"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-90167-6_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EvoMUSART","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Intelligence in Music, Sound, Art and Design (Part of EvoStar)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Trieste","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"evomusart2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.evostar.org\/2025\/evomusart\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}