{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T20:49:31Z","timestamp":1771274971683,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:00:00Z","timestamp":1759968000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:00:00Z","timestamp":1759968000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00530-025-02008-9","type":"journal-article","created":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T09:45:53Z","timestamp":1760003153000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-SBoRA: regional and non-overlapping weight updates for multi-concept customization of diffusion models"],"prefix":"10.1007","volume":"31","author":[{"given":"Haoxuan","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lai-Man","family":"Po","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuyang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wing-Yin","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianqi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeyu","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,9]]},"reference":[{"key":"2008_CR1","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2008_CR2","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"2008_CR3","unstructured":"Pandey, K., Mukherjee, A., Rai, P., Kumar, A.: Diffusevae: Efficient, controllable and high-fidelity generation from low-dimensional latents. arXiv preprint arXiv:2201.00308 (2022)"},{"issue":"5","key":"2008_CR4","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s00530-024-01515-5","volume":"30","author":"W Wang","year":"2024","unstructured":"Wang, W., Mu, M., Tian, Y., Hu, Y., Lu, X.: Ilsr-diff: joint face illumination normalization and super-resolution via diffusion models. Multimedia Syst. 30(5), 302 (2024)","journal-title":"Multimedia Syst."},{"key":"2008_CR5","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"3","key":"2008_CR6","doi-asserted-by":"publisher","first-page":"1391","DOI":"10.1007\/s00530-023-01067-0","volume":"29","author":"J Ding","year":"2023","unstructured":"Ding, J., Liu, B., Yu, J., Guo, H., Shen, M., Shen, K.: An efficient multi-path structure with staged connection and multi-scale mechanism for text-to-image synthesis. Multimed. Syst. 29(3), 1391\u20131403 (2023)","journal-title":"Multimedia Syst."},{"key":"2008_CR7","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., Beaumont, R., Vencu, R., Gordon, C., Wightman, R., Cherti, M., Coombes, T., Katta, A., Mullis, C., Wortsman, M., et al.: Laion-5b: An open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2008_CR8","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A.H., Chechik, G., Cohen-Or, D.: An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"2008_CR9","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2008_CR10","unstructured":"Hu, E.J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W.: Lora: Low-rank adaptation of large language models. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"2008_CR11","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.-Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"2008_CR12","unstructured":"Voynov, A., Chu, Q., Cohen-Or, D., Aberman, K.: $$ p+ $$: Extended textual conditioning in text-to-image generation. arXiv preprint arXiv:2303.09522 (2023)"},{"key":"2008_CR13","doi-asserted-by":"crossref","unstructured":"Han, L., Li, Y., Zhang, H., Milanfar, P., Metaxas, D., Yang, F.: Svdiff: Compact parameter space for diffusion fine-tuning. arXiv preprint arXiv:2303.11305 (2023)","DOI":"10.1109\/ICCV51070.2023.00673"},{"key":"2008_CR14","unstructured":"Wang, Y., Lin, Y., Zeng, X., Zhang, G.: Multilora: Democratizing lora for better multi-task learning. arXiv preprint arXiv:2311.11501 (2023)"},{"key":"2008_CR15","unstructured":"Zhong, M., Shen, Y., Wang, S., Lu, Y., Jiao, Y., Ouyang, S., Yu, D., Han, J., Chen, W.: Multi-lora composition for image generation. arXiv preprint arXiv:2402.16843 (2024)"},{"key":"2008_CR16","doi-asserted-by":"crossref","unstructured":"Po, R., Yang, G., Aberman, K., Wetzstein, G.: Orthogonal adaptation for modular customization of diffusion models. arXiv preprint arXiv:2312.02432 (2023)","DOI":"10.1109\/CVPR52733.2024.00761"},{"key":"2008_CR17","unstructured":"Gu, Y., Wang, X., Wu, J.Z., Shi, Y., Yunpeng, C., Fan, Z., Xiao, W., Zhao, R., Chang, S., Wu, W., Ge, Y., Ying, S., Shou, M.Z.: Mix-of-show: Decentralized low-rank adaptation for multi-concept customization of diffusion models. arXiv preprint arXiv:2305.18292 (2023)"},{"key":"2008_CR18","doi-asserted-by":"crossref","unstructured":"Po, L.-M., Liu, Y., Wu, H., Zhang, T., Yu, W.-Y., Jiang, Z., Li, K.: Sbora: Low-rank adaptation with regional weight updates. arXiv preprint arXiv:2407.05413 (2024)","DOI":"10.1007\/978-981-96-7008-6_28"},{"key":"2008_CR19","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2008_CR20","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"2008_CR21","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., Salimans, T., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2008_CR22","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1(2), 3 (2022)"},{"key":"2008_CR23","unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"2008_CR24","doi-asserted-by":"crossref","unstructured":"Gu, S., Chen, D., Bao, J., Wen, F., Zhang, B., Chen, D., Yuan, L., Guo, B.: Vector quantized diffusion model for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10696\u201310706 (2022)","DOI":"10.1109\/CVPR52688.2022.01043"},{"key":"2008_CR25","unstructured":"Balaji, Y., Nah, S., Huang, X., Vahdat, A., Song, J., Zhang, Q., Kreis, K., Aittala, M., Aila, T., Laine, S., et al.: ediff-i: Text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)"},{"issue":"7","key":"2008_CR26","doi-asserted-by":"publisher","first-page":"5400","DOI":"10.1109\/TCSVT.2023.3347971","volume":"34","author":"H Tan","year":"2024","unstructured":"Tan, H., Yin, B., Xu, K., Wang, H., Liu, X., Li, X.: Attention-bridged modal interaction for text-to-image generation. IEEE Trans. Circuits Syst. Video Technol. 34(7), 5400\u20135413 (2024). https:\/\/doi.org\/10.1109\/TCSVT.2023.3347971","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2008_CR27","doi-asserted-by":"publisher","unstructured":"Xue, Y., Po, L.-M., Yu, W.-Y., Wu, H., Xu, X., Li, K., Liu, Y.: Self-calibration flow guided denoising diffusion model for human pose transfer. IEEE Trans. Circuits Syst. Video Technol. 1\u20131 (2024) https:\/\/doi.org\/10.1109\/TCSVT.2024.3382948","DOI":"10.1109\/TCSVT.2024.3382948"},{"issue":"8","key":"2008_CR28","doi-asserted-by":"publisher","first-page":"6860","DOI":"10.1109\/TCSVT.2024.3369757","volume":"34","author":"H Chen","year":"2024","unstructured":"Chen, H., Zhang, Y., Wang, X., Duan, X., Zhou, Y., Zhu, W.: Disendreamer: Subject-driven text-to-image generation with sample-aware disentangled tuning. IEEE Trans. Circuits Syst. Video Technol. 34(8), 6860\u20136873 (2024). https:\/\/doi.org\/10.1109\/TCSVT.2024.3369757","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2008_CR29","unstructured":"Zhang, L., Zhang, L., Shi, S., Chu, X., Li, B.: Lora-fa: Memory-efficient low-rank adaptation for large language models fine-tuning. arXiv preprint arXiv:2308.03303 (2023)"},{"key":"2008_CR30","doi-asserted-by":"crossref","unstructured":"Shi, J., Xiong, W., Lin, Z., Jung, H.J.: Instantbooth: Personalized text-to-image generation without test-time finetuning. arXiv preprint arXiv:2304.03411 (2023)","DOI":"10.1109\/CVPR52733.2024.00816"},{"key":"2008_CR31","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: Encoding visual concepts into textual embeddings for customized text-to-image generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15943\u201315953 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"2008_CR32","unstructured":"Jia, X., Zhao, Y., Chan, K.C., Li, Y., Zhang, H., Gong, B., Hou, T., Wang, H., Su, Y.-C.: Taming encoder for zero fine-tuning image customization with text-to-image diffusion models. arXiv preprint arXiv:2304.02642 (2023)"},{"key":"2008_CR33","unstructured":"Su, Y.-C., Chan, K.C., Li, Y., Zhao, Y., Zhang, H., Gong, B., Wang, H., Jia, X.: Identity encoder for personalized diffusion. arXiv preprint arXiv:2304.07429 (2023)"},{"key":"2008_CR34","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Wei, W., Hou, T., Pritch, Y., Wadhwa, N., Rubinstein, M., Aberman, K.: Hyperdreambooth: Hypernetworks for fast personalization of text-to-image models. arXiv preprint arXiv:2307.06949 (2023)","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"2008_CR35","unstructured":"Sohn, K., Ruiz, N., Lee, K., Chin, D.C., Blok, I., Chang, H., Barber, J., Jiang, L., Entis, G., Li, Y., et al.: Styledrop: Text-to-image generation in any style. arXiv preprint arXiv:2306.00983 (2023)"},{"key":"2008_CR36","unstructured":"Wang, Z., Wang, X., Xie, L., Qi, Z., Shan, Y., Wang, W., Luo, P.: Styleadapter: A single-pass lora-free model for stylized image generation. arXiv preprint arXiv:2309.01770 (2023)"},{"key":"2008_CR37","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"2008_CR38","unstructured":"Yu, J., Xu, Y., Koh, J.Y., Luong, T., Baid, G., Wang, Z., Vasudevan, V., Ku, A., Yang, Y., Ayan, B.K., et al.: Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789 2(3), 5 (2022)"},{"key":"2008_CR39","unstructured":"Lee, K., Liu, H., Ryu, M., Watkins, O., Du, Y., Boutilier, C., Abbeel, P., Ghavamzadeh, M., Gu, S.S.: Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192 (2023)"},{"key":"2008_CR40","doi-asserted-by":"crossref","unstructured":"Wu, X., Sun, K., Zhu, F., Zhao, R., Li, H.: Better aligning text-to-image models with human preference. arXiv preprint arXiv:2303.14420 (2023)","DOI":"10.1109\/ICCV51070.2023.00200"},{"key":"2008_CR41","doi-asserted-by":"crossref","unstructured":"Ma, W.-D.K., Lewis, J., Lahiri, A., Leung, T., Kleijn, W.B.: Directed diffusion: Direct control of object placement through attention guidance. arXiv preprint arXiv:2302.13153 (2023)","DOI":"10.1609\/aaai.v38i5.28204"},{"key":"2008_CR42","unstructured":"McMahan, B., Moore, E., Ramage, D., Hampson, S., Arcas, B.A.: Communication-efficient learning of deep networks from decentralized data. In: Artificial Intelligence and Statistics, pp. 1273\u20131282 (2017). PMLR"},{"key":"2008_CR43","unstructured":"Wang, L., Xu, S., Wang, X., Zhu, Q.: Addressing class imbalance in federated learning. arXiv e-prints, 2008 (2020)"},{"key":"2008_CR44","unstructured":"Shi, Y., Liang, J., Zhang, W., Tan, V.Y., Bai, S.: Towards understanding and mitigating dimensional collapse in heterogeneous federated learning. arXiv preprint arXiv:2210.00226 (2022)"},{"key":"2008_CR45","unstructured":"Reddi, S., Charles, Z., Zaheer, M., Garrett, Z., Rush, K., Kone\u010dn\u1ef3, J., Kumar, S., McMahan, H.B.: Adaptive federated optimization. arXiv preprint arXiv:2003.00295 (2020)"},{"key":"2008_CR46","first-page":"5972","volume":"34","author":"M Luo","year":"2021","unstructured":"Luo, M., Chen, F., Hu, D., Zhang, Y., Liang, J., Feng, J.: No fear of heterogeneity: Classifier calibration for federated learning with non-iid data. Adv. Neural. Inf. Process. Syst. 34, 5972\u20135984 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2008_CR47","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6038\u20136047 (2023)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"2008_CR48","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"2008_CR49","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-assisted intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18, pp. 234\u2013241 (2015). Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2008_CR50","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"2008_CR51","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021). PMLR"},{"key":"2008_CR52","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. (2017)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02008-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02008-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02008-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:58:03Z","timestamp":1766127483000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02008-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,9]]},"references-count":52,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2008"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02008-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,9]]},"assertion":[{"value":"21 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"415"}}