{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T05:01:01Z","timestamp":1770699661965,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":32,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819569564","type":"print"},{"value":"9789819569571","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-6957-1_16","type":"book-chapter","created":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T10:44:04Z","timestamp":1770633844000},"page":"219-231","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Image Generation of Diffusion Models with Structural Image Guidance"],"prefix":"10.1007","author":[{"given":"Wei","family":"Wang","sequence":"first","affiliation":[]},{"given":"JiaYi","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,10]]},"reference":[{"key":"16_CR1","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural Inf. Proces. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"16_CR2","unstructured":"Guo. Y., Yang. C., Rao. A., et al.: Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725, (2023)."},{"key":"16_CR3","first-page":"10684","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"R Rombach","year":"2022","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., et al.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)"},{"key":"16_CR4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355","volume-title":"Adding Conditional Control to Text-to-Image Diffusion Models","author":"L Zhang","year":"2023","unstructured":"Zhang, L., Rao, A., Agrawala, M., Adding Conditional Control to Text-to-Image Diffusion Models, (2023)."},{"key":"16_CR5","first-page":"4296","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"C Mou","year":"2024","unstructured":"Mou, C., Wang, X., Xie, L., et al. T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. Proceedings of the AAAI Conference on Artificial Intelligence, 2024, 38(5): 4296\u20134304."},{"key":"16_CR6","first-page":"7132","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"J Hu","year":"2018","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)"},{"key":"16_CR7","first-page":"11534","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Q Wang","year":"2020","unstructured":"Wang, Q., Wu, B., Zhu, P., et al.: ECA-net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11534\u201311542 (2020)"},{"key":"16_CR8","first-page":"2256","volume-title":"International Conference on Machine Learning. PMLR","author":"J Sohl-dickstein","year":"2015","unstructured":"Sohl-dickstein, J., Weiss, E., Maheswaranathan, N., et al.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning. PMLR, pp. 2256\u20132265 (2015)"},{"key":"16_CR9","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural Inf. Proces. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"16_CR10","first-page":"8780","volume-title":"Proceeding of the 34th Advances in Neural Information Processing Systems","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. In: Proceeding of the 34th Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794. MA: MIT, Cambridge (2021)"},{"key":"16_CR11","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., et al.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, (2022)"},{"key":"16_CR12","unstructured":"Runwayml Stable Diffusion v1.5: Stable Diffusion v1\u20135 Model Card [EB\/OL], (2023-08-24) [2024-06-30] https:\/\/huggingface.co\/runwayml\/stable-diffusion-v1-5"},{"key":"16_CR13","first-page":"22511","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Y Li","year":"2023","unstructured":"Li, Y., Liu, H., Wu, Q., et al.: Gligen: open-set grounded text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22511\u201322521 (2023)"},{"key":"16_CR14","first-page":"343","volume-title":"European Conference on Computer Vision","author":"D Zavadski","year":"2024","unstructured":"Zavadski, D., Feiden, J.F., Rother, C.: Control net-XS: rethinking the control of text-to-image diffusion models as feedback-control systems. In: European Conference on Computer Vision, pp. 343\u2013362. Springer Nature Switzerland, Cham (2024)"},{"key":"16_CR15","unstructured":"Peng, B., Wang, J., Zhang, Y., et al.: Controlnext: Powerful and efficient control for image and video generation. arXiv preprint arXiv:2408.06070, (2024)"},{"key":"16_CR16","unstructured":"Hecong, W.: Control LoRA: A Lightweight Neural Network To Control Stable Diffusion Spatial Information [EB\/OL]. (2023)"},{"key":"16_CR17","unstructured":"Blattmann, A., Dockhorn, T., Kulal, S., et al.: Stable video diffusion: Scaling latent video diffusion models to large datasets. arXiv preprint arXiv:2311.15127, (2023)"},{"key":"16_CR18","unstructured":"Podell, D., English, Z., Lacey, K., et al.: SDXL: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952, (2023)"},{"key":"16_CR19","first-page":"1321513224","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"YC Zhang","year":"2024","unstructured":"Zhang, Y.C., Qian, S.J., Peng, B.H., et al.: Prompt highlighter: interactive control for multimodal llms. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p. 1321513224 (2024)"},{"key":"16_CR20","unstructured":"Hu, E.J., Shen, Y., Wallis, P., et al. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685, (2021)"},{"key":"16_CR21","first-page":"181","volume-title":"European Conference on Computer Vision","author":"Y Frenkel","year":"2024","unstructured":"Frenkel, Y., Vinker, Y., Shamir, A., et al.: Implicit style-content separation using b-lora. In: European Conference on Computer Vision, pp. 181\u2013198. Springer Nature Switzerland, Cham (2024)"},{"key":"16_CR22","unstructured":"Wu, Y., Shi, Y., Wei, J., et al.: Difflora: Generating personalized low-rank adaptation weights with diffusion. arXiv preprint arXiv:2408.06740, (2024)"},{"key":"16_CR23","first-page":"5375","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"ZJK Hartly","year":"2024","unstructured":"Hartly, Z.J.K., Lind, R.J., Pound, M.P., et al.: Domain targeted synthetic plant style transfer using stable diffusion LoRA and control net. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5375\u20135383 (2024)"},{"key":"16_CR24","first-page":"1","volume":"2024","author":"SF Bhat","year":"2024","unstructured":"Bhat, S.F., Mitra, N., Wonka, P.: Loosecontrol: lifting controlnet for generalized depth conditioning [C]\/\/ACM SIGGRAPH. Conf. Papers. 2024, 1\u201311 (2024)","journal-title":"Conf. Papers."},{"key":"16_CR25","unstructured":"Xu, Y., He, Z., Shan, S., et al.: Ctr LoRA: An Extensible and Efficient Framework for Controllable Image Generation. arXiv preprint arXiv:2410.09400, (2024)"},{"key":"16_CR26","unstructured":"Ba, J.K., Kiros, J.R.: G.E. Hinton Layer normalization. arXiv preprint arXiv: 1607.06450, (2016)."},{"key":"16_CR27","first-page":"448","volume-title":"International Conference on Machine Learning","author":"S Ioffe","year":"2015","unstructured":"Ioffe, S., Szegedy, C.: batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456. PMLR (2015)"},{"key":"16_CR28","first-page":"3","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV)","author":"YX Wuand","year":"2018","unstructured":"Wuand, Y.X., He, K.M.: Group normalization. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)"},{"issue":"2","key":"16_CR29","first-page":"3","volume":"1","author":"EJ Hu","year":"2022","unstructured":"Hu, E.J., Shen, Y., Wallis, P., et al.: Lora: low-rank adaptation of large language models. ICLR. 1(2), 3 (2022)","journal-title":"ICLR"},{"key":"16_CR30","first-page":"1","volume-title":"European Conference on Computer Vision","author":"X Liu","year":"2024","unstructured":"Liu, X., Wei, Y., Liu, M., et al.: Smartcontrol: enhancing controlnet for handling rough visual conditions. In: European Conference on Computer Vision, pp. 1\u201317. Springer Nature Switzerland, Cham (2024)"},{"key":"16_CR31","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"C Mou","year":"2024","unstructured":"Mou, Chong, et al.: T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. Proceedings of the AAAI Conference on Artificial Intelligence. 38. 5 (2024)"},{"key":"16_CR32","unstructured":"Qin, C, Zhang, S, Yu, N, et al.: Unicontrol: A unified diffusion model for controllable visual generation in the wild. arXiv preprint arXiv:2305.11147 (2023)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-6957-1_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T10:44:10Z","timestamp":1770633850000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-6957-1_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819569564","9789819569571"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-6957-1_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"10 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2026.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}