{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T16:10:13Z","timestamp":1772554213355,"version":"3.50.1"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730351","type":"print"},{"value":"9783031730368","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73036-8_15","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T09:05:34Z","timestamp":1732093534000},"page":"257-274","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Diffusion Soup: Model Merging for\u00a0Text-to-Image Diffusion Models"],"prefix":"10.1007","author":[{"given":"Benjamin","family":"Biggs","sequence":"first","affiliation":[]},{"given":"Arjun","family":"Seshadri","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Zou","sequence":"additional","affiliation":[]},{"given":"Achin","family":"Jain","sequence":"additional","affiliation":[]},{"given":"Aditya","family":"Golatkar","sequence":"additional","affiliation":[]},{"given":"Yusheng","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Alessandro","family":"Achille","sequence":"additional","affiliation":[]},{"given":"Ashwin","family":"Swaminathan","sequence":"additional","affiliation":[]},{"given":"Stefano","family":"Soatto","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"15_CR1","unstructured":"Stable Diffusion ComfyUI (2023). https:\/\/github.com\/comfyanonymous\/ComfyUI"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Achille, A., Golatkar, A., Ravichandran, A., Polito, M., Soatto, S.: LQF: linear quadratic fine-tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15729\u201315739 (2021)","DOI":"10.1109\/CVPR46437.2021.01547"},{"key":"15_CR3","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1007\/978-3-031-20050-2_13","volume-title":"ECCV 2022","author":"O Avrahami","year":"2022","unstructured":"Avrahami, O., Lischinski, D., Fried, O.: Gan cocktail: mixing GANs without dataset access. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13683, pp. 205\u2013221. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20050-2_13"},{"key":"15_CR4","unstructured":"Balaji, Y., et\u00a0al.: Ediffi: text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)"},{"key":"15_CR5","unstructured":"Carlini, N., et al.: Extracting training data from diffusion models. In: 32nd USENIX Security Symposium (USENIX Security 23), pp. 5253\u20135270 (2023)"},{"key":"15_CR6","unstructured":"Chen, J., et\u00a0al.: Pixart-alpha: fast training of diffusion transformer for photorealistic text-to-image synthesis. In: ICLR 2024 (2023)"},{"key":"15_CR7","unstructured":"Chen, Z., Deng, Y., Wu, Y., Gu, Q., Li, Y.: Towards understanding mixture of experts in deep learning. arXiv preprint arXiv:2208.02813 (2022)"},{"key":"15_CR8","unstructured":"Cheng, X., Bartlett, P.: Convergence of langevin mcmc in kl-divergence. In: Algorithmic Learning Theory, pp. 186\u2013211. PMLR (2018)"},{"key":"15_CR9","unstructured":"Choshen, L., Venezian, E., Slonim, N., Katz, Y.: Fusing finetuned models for better pretraining. arXiv preprint arXiv:2204.03044 (2022)"},{"key":"15_CR10","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-031-20074-8_15","volume-title":"ECCV 2022","author":"PN Chowdhury","year":"2022","unstructured":"Chowdhury, P.N., Sain, A., Bhunia, A.K., Xiang, T., Gryaditskaya, Y., Song, Y.Z.: FS-COCO: towards understanding of freehand sketches of common objects in context. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13668, pp. 253\u2013270. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_15"},{"key":"15_CR11","unstructured":"Dai, X., et al.: EMU: enhancing image generation models using photogenic needles in a haystack (2023)"},{"key":"15_CR12","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR13","unstructured":"Dockhorn, T., Vahdat, A., Kreis, K.: Score-based generative modeling with critically-damped langevin diffusion. arXiv preprint arXiv:2112.07068 (2021)"},{"key":"15_CR14","unstructured":"Du, Y., et al.: Reduce, reuse, recycle: compositional generation with energy-based diffusion models and MCMC. In: International Conference on Machine Learning, pp. 8489\u20138510. PMLR (2023)"},{"key":"15_CR15","doi-asserted-by":"publisher","unstructured":"Gal, R., et al.: An image is worth one word: Personalizing text-to-image generation using textual inversion (2022). https:\/\/doi.org\/10.48550\/ARXIV.2208.01618, https:\/\/arxiv.org\/abs\/2208.01618","DOI":"10.48550\/ARXIV.2208.01618"},{"key":"15_CR16","unstructured":"Garipov, T., Izmailov, P., Podoprikhin, D., Vetrov, D.P., Wilson, A.G.: Loss surfaces, mode connectivity, and fast ensembling of DNNs. In: Advances in Neural Information Processing Systems, 31 (2018)"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Golatkar, A., Achille, A., Ravichandran, A., Polito, M., Soatto, S.: Mixed-privacy forgetting in deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 792\u2013801 (2021)","DOI":"10.1109\/CVPR46437.2021.00085"},{"key":"15_CR18","unstructured":"Golatkar, A., Achille, A., Swaminathan, A., Soatto, S.: Training data protection with compositional diffusion models. arXiv preprint arXiv:2308.01937 (2023)"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Golatkar, A., Achille, A., Wang, Y.X., Roth, A., Kearns, M., Soatto, S.: Mixed differential privacy in computer vision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8376\u20138386 (2022)","DOI":"10.1109\/CVPR52688.2022.00819"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Golatkar, A., Achille, A., Zancato, L., Wang, Y.X., Swaminathan, A., Soatto, S.: CPR: retrieval augmented generation for copyright protection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12374\u201312384 (2024)","DOI":"10.1109\/CVPR52733.2024.01176"},{"key":"15_CR21","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR22","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Hu, Y., Liu, B., Kasai, J., Wang, Y., Ostendorf, M., Krishna, R., Smith, N.A.: TIFA: accurate and interpretable text-to-image faithfulness evaluation with question answering. arXiv preprint arXiv:2303.11897 (2023)","DOI":"10.1109\/ICCV51070.2023.01866"},{"key":"15_CR24","unstructured":"Hyv\u00e4rinen, A., Dayan, P.: Estimation of non-normalized statistical models by score matching. J. Mach. Learn. Res. 6(4) (2005)"},{"key":"15_CR25","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D., Wilson, A.G.: Averaging weights leads to wider optima and better generalization. arXiv preprint arXiv:1803.05407 (2018)"},{"key":"15_CR26","first-page":"26565","volume":"35","author":"T Karras","year":"2022","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. Adv. Neural. Inf. Process. Syst. 35, 26565\u201326577 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR27","unstructured":"Lee, J., et al.: Wide neural networks of any depth evolve as linear models under gradient descent. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR28","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML (2022)"},{"issue":"3","key":"15_CR29","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1137\/0317028","volume":"17","author":"A Lindquist","year":"1979","unstructured":"Lindquist, A., Picci, G.: On the stochastic realization problem. SIAM J. Control. Optim. 17(3), 365\u2013389 (1979)","journal-title":"SIAM J. Control. Optim."},{"key":"15_CR30","series-title":"LNCS","first-page":"423","volume-title":"ECCV 2022","author":"N Liu","year":"2022","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13677, pp. 423\u2013439. Springer, Cham (2022)"},{"key":"15_CR31","unstructured":"Liu, T.Y., Golatkar, A., Soatto, S.: Tangent transformers for composition, privacy and removal. arXiv preprint arXiv:2307.08122 (2023)"},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Liu, T.Y., Soatto, S.: Tangent model composition for ensembling and continual fine-tuning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 18676\u201318686 (2023)","DOI":"10.1109\/ICCV51070.2023.01712"},{"key":"15_CR33","unstructured":"Malladi, S., Wettig, A., Yu, D., Chen, D., Arora, S.: A kernel-based view of language model fine-tuning. In: International Conference on Machine Learning, pp. 23610\u201323641. PMLR (2023)"},{"key":"15_CR34","unstructured":"Matena, M., Raffel, C.: Merging models with fisher-weighted averaging (2021). arXiv preprint arXiv:2111.09832"},{"key":"15_CR35","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1023\/A:1008923215028","volume":"11","author":"RM Neal","year":"2001","unstructured":"Neal, R.M.: Annealed importance sampling. Stat. Comput. 11, 125\u2013139 (2001)","journal-title":"Stat. Comput."},{"key":"15_CR36","unstructured":"Pinkney, J.N.M.: Pokemon blip captions (2022). https:\/\/huggingface.co\/datasets\/lambdalabs\/pokemon-blip-captions\/"},{"key":"15_CR37","unstructured":"Pinkney, J.N., Adler, D.: Resolution dependent GAN interpolation for controllable image synthesis between domains. arXiv preprint arXiv:2010.05334 (2020)"},{"key":"15_CR38","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=di52zR8xgf"},{"key":"15_CR39","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"15_CR40","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR (2021)"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"15_CR42","unstructured":"Salimans, T., Ho, J.: Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512 (2022)"},{"key":"15_CR43","unstructured":"Schuhmann, C.: LAION-Aesthetics (2022). https:\/\/github.com\/christophschuhmann\/improved-aesthetic-predictor"},{"key":"15_CR44","unstructured":"Somepalli, G., Singla, V., Goldblum, M., Geiping, J., Goldstein, T.: Understanding and mitigating copying in diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR45","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"15_CR46","first-page":"1415","volume":"34","author":"Y Song","year":"2021","unstructured":"Song, Y., Durkan, C., Murray, I., Ermon, S.: Maximum likelihood training of score-based diffusion models. Adv. Neural. Inf. Process. Syst. 34, 1415\u20131428 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR47","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"15_CR48","doi-asserted-by":"publisher","unstructured":"Tan, W.R., Chan, C.S., Aguirre, H., Tanaka, K.: Improved ArtGAN for conditional synthesis of natural image and artwork. IEEE Trans. Image Process. 28(1), 394\u2013409 (2019). https:\/\/doi.org\/10.1109\/TIP.2018.2866698","DOI":"10.1109\/TIP.2018.2866698"},{"key":"15_CR49","unstructured":"Vempala, S., Wibisono, A.: Rapid convergence of the unadjusted Langevin algorithm: isoperimetry suffices. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR50","unstructured":"Vyas, N., Kakade, S., Barak, B.: Provable copyright protection for generative models. arXiv preprint arXiv:2302.10870 (2023)"},{"key":"15_CR51","unstructured":"Wei, T., Guo, Z., Chen, Y., He, J.: NTK-approximating MLP fusion for efficient language model fine-tuning (2023)"},{"key":"15_CR52","unstructured":"Wortsman, M., et\u00a0al.: Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time. In: International Conference on Machine Learning, pp. 23965\u201323998. PMLR (2022)"},{"key":"15_CR53","unstructured":"Xu, J., et al.: Imagereward: learning and evaluating human preferences for text-to-image generation. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR54","unstructured":"Xue, Z., et al.: Raphael: text-to-image generation via large mixture of diffusion paths. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR55","unstructured":"Yang, K.Y., Wibisono, A.: Convergence in kl and r\u00e9nyi divergence of the unadjusted Langevin algorithm using estimated score. In: NeurIPS 2022 Workshop on Score-Based Methods (2022)"},{"key":"15_CR56","first-page":"6136","volume":"33","author":"L Zancato","year":"2020","unstructured":"Zancato, L., Achille, A., Ravichandran, A., Bhotika, R., Soatto, S.: Predicting training time without training. Adv. Neural. Inf. Process. Syst. 33, 6136\u20136146 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR57","first-page":"7103","volume":"35","author":"Y Zhou","year":"2022","unstructured":"Zhou, Y., et al.: Mixture-of-experts with expert choice routing. Adv. Neural. Inf. Process. Syst. 35, 7103\u20137114 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73036-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T09:54:35Z","timestamp":1732096475000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73036-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031730351","9783031730368"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73036-8_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}