{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T05:01:02Z","timestamp":1770699662016,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":36,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819569564","type":"print"},{"value":"9789819569571","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-6957-1_44","type":"book-chapter","created":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T10:43:57Z","timestamp":1770633837000},"page":"616-630","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SPADE: Attention-Guided Split Diffusion for\u00a0Precise Spatial Control in\u00a0Interior Layout Image Generation"],"prefix":"10.1007","author":[{"given":"Wenzheng","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizhou","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lianghao","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofeng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qianqian","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ronghui","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoyong","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tan","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,10]]},"reference":[{"key":"44_CR1","unstructured":"Betker, J., et\u00a0al.: Improving image generation with better captions. Comput. Sci. 2(3), 8 (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"44_CR2","unstructured":"Cao, J., Liu, J., Kitani, K., Zhou, Y.: Multi-modal diffusion for hand-object grasp generation. arXiv preprint arXiv:2409.04560 (2024)"},{"key":"44_CR3","unstructured":"Chen, C., et al.: Diffusion models for multi-modal generative modeling. arXiv preprint arXiv:2407.17571 (2024)"},{"key":"44_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, L., Li, S.: Diffuspoll: Conditional text diffusion model for poll generation. In: Findings of the Association for Computational Linguistics ACL 2024, pp. 925\u2013935 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.54"},{"key":"44_CR5","unstructured":"Dubey, A., et\u00a0al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"44_CR6","unstructured":"Esser, P., et\u00a0al.: Scaling rectified flow transformers for high-resolution image synthesis. In: Forty-first International Conference on Machine Learning (2024)"},{"key":"44_CR7","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local NASH equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"issue":"2","key":"44_CR8","first-page":"3","volume":"1","author":"EJ Hu","year":"2022","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. ICLR 1(2), 3 (2022)","journal-title":"ICLR"},{"key":"44_CR9","doi-asserted-by":"crossref","unstructured":"Hu, X., Li, S., Ying, Q., Peng, W., Zhang, X., Qian, Z.: Establishing robust generative image steganography via popular stable diffusion. IEEE Trans. Inf. Forensics Secur. (2024)","DOI":"10.1109\/TIFS.2024.3444311"},{"key":"44_CR10","doi-asserted-by":"crossref","unstructured":"Huang, K., Duan, C., Sun, K., Xie, E., Li, Z., Liu, X.: T2i-compbench++: an enhanced and comprehensive benchmark for compositional text-to-image generation. IEEE Trans. Pattern Anal. Mach. Intell. (2025)","DOI":"10.1109\/TPAMI.2025.3531907"},{"key":"44_CR11","first-page":"78723","volume":"36","author":"K Huang","year":"2023","unstructured":"Huang, K., Sun, K., Xie, E., Li, Z., Liu, X.: T2i-compbench: a comprehensive benchmark for open-world compositional text-to-image generation. Adv. Neural. Inf. Process. Syst. 36, 78723\u201378747 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"44_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Z., Chan, K.C., Jiang, Y., Liu, Z.: Collaborative diffusion for multi-modal face generation and editing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6080\u20136090 (2023)","DOI":"10.1109\/CVPR52729.2023.00589"},{"key":"44_CR13","doi-asserted-by":"crossref","unstructured":"Jia, C., Luo, M., Dang, Z., Dai, G., Chang, X., Wang, M., Wang, J.: Ssmg: spatial-semantic map guided diffusion model for free-form layout-to-image generation. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a038, pp. 2480\u20132488 (2024)","DOI":"10.1609\/aaai.v38i3.28024"},{"key":"44_CR14","unstructured":"Kong, C., Kwak, N.: Analyzing multimodal objectives through the lens of generative diffusion guidance. arXiv preprint arXiv:2302.10305 (2023)"},{"issue":"28","key":"44_CR15","doi-asserted-by":"publisher","first-page":"17245","DOI":"10.1007\/s00521-023-09021-x","volume":"36","author":"R Li","year":"2024","unstructured":"Li, R., Li, W., Yang, Y., Wei, H., Jiang, J., Bai, Q.: Swinv2-imagen: hierarchical vision transformer diffusion models for text-to-image generation. Neural Comput. Appl. 36(28), 17245\u201317260 (2024)","journal-title":"Neural Comput. Appl."},{"key":"44_CR16","doi-asserted-by":"crossref","unstructured":"Li, W., Xu, X., Liu, J., Xiao, X.: Unimo-g: unified image generation through multimodal conditional diffusion. arXiv preprint arXiv:2401.13388 (2024)","DOI":"10.18653\/v1\/2024.acl-long.335"},{"key":"44_CR17","unstructured":"Li, W., et\u00a0al.: Upainting: unified text-to-image diffusion generation with cross-modal guidance. arXiv preprint arXiv:2210.16031 (2022)"},{"key":"44_CR18","unstructured":"Lipman, Y., Chen, R.T., Ben-Hamu, H., Nickel, M., Le, M.: Flow matching for generative modeling. arXiv preprint arXiv:2210.02747 (2022)"},{"key":"44_CR19","unstructured":"Liu, X., Gong, C., Liu, Q.: Flow straight and fast: learning to generate and transfer data with rectified flow. arXiv preprint arXiv:2209.03003 (2022)"},{"key":"44_CR20","unstructured":"Ma, Y., Yang, H., Wang, W., Fu, J., Liu, J.: Unified multi-modal latent diffusion for joint subject and text conditional image generation. arXiv preprint arXiv:2303.09319 (2023)"},{"key":"44_CR21","unstructured":"Minello, G., Bicciato, A., Rossi, L., Torsello, A., Cosmo, L.: Graph generation via spectral diffusion. arXiv preprint arXiv:2402.18974 (2024)"},{"key":"44_CR22","doi-asserted-by":"crossref","unstructured":"Nair, N.G., Valanarasu, J.M.J., Patel, V.M.: Maxfusion: Plug &play multi-modal generation in text-to-image diffusion models. In: European Conference on Computer Vision, pp. 93\u2013110. Springer (2024)","DOI":"10.1007\/978-3-031-72920-1_6"},{"key":"44_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.108221","volume":"133","author":"EE Ngasa","year":"2024","unstructured":"Ngasa, E.E., Jang, M.A., Tarimo, S.A., Woo, J., Shin, H.B.: Diffusion-based Wasserstein generative adversarial network for blood cell image augmentation. Eng. Appl. Artif. Intell. 133, 108221 (2024)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"44_CR24","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"44_CR25","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"44_CR26","doi-asserted-by":"crossref","unstructured":"Sakabe, K., Ono, K., Adamidis, P., Masuda, N.: Generating interior images with latent user preferences through GANs. In: 2024 IEEE Congress on Evolutionary Computation (CEC), pp.\u00a01\u20137. IEEE (2024)","DOI":"10.1109\/CEC60901.2024.10611990"},{"key":"44_CR27","doi-asserted-by":"crossref","unstructured":"Sun, L., Tan, D.: Distributed 3d interior environment design system based on color image model. Informatica 49(10) (2025)","DOI":"10.31449\/inf.v49i10.5599"},{"key":"44_CR28","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"44_CR29","doi-asserted-by":"crossref","unstructured":"Wang, H.: Vision transformer-based framework for AI-generated image detection in interior design. Informatica 49(16) (2025)","DOI":"10.31449\/inf.v49i16.7979"},{"key":"44_CR30","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: A two-stage generative model with cycle GAN and joint diffusion for MRI-based brain tumor detection. IEEE J. Biomed. Health Inform. (2024)","DOI":"10.1109\/JBHI.2024.3373018"},{"key":"44_CR31","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"44_CR32","unstructured":"Xu, G., Jin, P., Hao, L., Song, Y., Sun, L., Yuan, L.: Llava-cot: let vision language models reason step-by-step, (2024). https:\/\/arxiv.org\/abs\/2411.10440"},{"key":"44_CR33","unstructured":"Yang, L., Yu, Z., Meng, C., Xu, M., Ermon, S., Bin, C.: Mastering text-to-image diffusion: recaptioning, planning, and generating with multimodal LLMs. In: Forty-first International Conference on Machine Learning (2024)"},{"key":"44_CR34","unstructured":"Yang, L., et al.: Cross-modal contextualized diffusion models for text-guided visual generation and editing. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"44_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, Y.: Interior space design method considering image feature extraction algorithms. IEEE Access (2024)","DOI":"10.1109\/ACCESS.2024.3442157"},{"key":"44_CR36","unstructured":"Zhu, H., Xiao, T., Honavar, V.G.: 3m-diffusion: latent multi-modal diffusion for text-guided generation of molecular graphs. arXiv e-prints pp. arXiv\u20132403 (2024)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-6957-1_44","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T10:44:04Z","timestamp":1770633844000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-6957-1_44"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819569564","9789819569571"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-6957-1_44","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"10 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2026.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}