{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:37:29Z","timestamp":1778081849502,"version":"3.51.4"},"publisher-location":"Cham","reference-count":70,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730061","type":"print"},{"value":"9783031730078","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73007-8_20","type":"book-chapter","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T19:02:40Z","timestamp":1727722960000},"page":"342-359","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["SlimFlow: Training Smaller One-Step Diffusion Models with\u00a0Rectified Flow"],"prefix":"10.1007","author":[{"given":"Yuanzhi","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingchao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,1]]},"reference":[{"key":"20_CR1","unstructured":"Berthelot, D., et al.: Tract: denoising diffusion models with transitive closure time-distillation. arXiv preprint arXiv:2303.04248 (2023)"},{"key":"20_CR2","unstructured":"Crowson, K., Baumann, S.A., Birch, A., Abraham, T.M., Kaplan, D.Z., Shippole, E.: Scalable high-resolution pixel-space image synthesis with hourglass diffusion transformers. arXiv preprint arXiv:2401.11605 (2024)"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"20_CR4","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"20_CR5","unstructured":"Dinh, L., Sohl-Dickstein, J., Bengio, S.: Density estimation using real NVP. arXiv preprint arXiv:1605.08803 (2016)"},{"key":"20_CR6","unstructured":"Dockhorn, T., Vahdat, A., Kreis, K.: Genie: higher-order denoising diffusion solvers. In: Advances in Neural Information Processing Systems, vol. 35, pp. 30150\u201330166 (2022)"},{"key":"20_CR7","unstructured":"Fang, G., Ma, X., Wang, X.: Structural pruning for diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR8","unstructured":"Geyer, M., Bar-Tal, O., Bagon, S., Dekel, T.: TokenFlow: consistent diffusion features for consistent video editing. arXiv preprint arXiv:2307.10373 (2023)"},{"issue":"11","key":"20_CR9","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"20_CR10","unstructured":"Gu, J., Zhai, S., Zhang, Y., Liu, L., Susskind, J.: Boot: data-free distillation of denoising diffusion models with bootstrapping. arXiv preprint arXiv:2306.05544 (2023)"},{"key":"20_CR11","unstructured":"He, Y., Liu, J., Wu, W., Zhou, H., Zhuang, B.: EfficientDM: efficient quantization-aware fine-tuning of low-bit diffusion models. arXiv preprint arXiv:2310.03270 (2023)"},{"key":"20_CR12","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR13","unstructured":"Ho, J., et\u00a0al.: Imagen video: high definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"20_CR14","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Huang, Y., Gong, R., Liu, J., Chen, T., Liu, X.: TFMQ-DM: temporal feature maintenance quantization for diffusion models. arXiv preprint arXiv:2311.16503 (2023)","DOI":"10.1109\/CVPR52733.2024.00703"},{"key":"20_CR16","unstructured":"Hyv\u00e4rinen, A., Dayan, P.: Estimation of non-normalized statistical models by score matching. J. Mach. Learn. Res. 6(4) (2005)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Jeong, M., Kim, H., Cheon, S.J., Choi, B.J., Kim, N.S.: Diff-TTS: a denoising diffusion model for text-to-speech. arXiv preprint arXiv:2104.01409 (2021)","DOI":"10.21437\/Interspeech.2021-469"},{"key":"20_CR18","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. arXiv preprint arXiv:2206.00364 (2022)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Kim, B.K., Song, H.K., Castells, T., Choi, S.: BK-SDM: a lightweight, fast, and cheap version of stable diffusion. arXiv preprint arXiv:2305.15798 (2023)","DOI":"10.1007\/978-3-031-72949-2_22"},{"key":"20_CR21","unstructured":"Kim, D., et al.: Consistency trajectory models: learning probability flow ode trajectory of diffusion. arXiv preprint arXiv:2310.02279 (2023)"},{"key":"20_CR22","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"20_CR23","unstructured":"Kong, Z., Ping, W., Huang, J., Zhao, K., Catanzaro, B.: DiffWave: a versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761 (2020)"},{"key":"20_CR24","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"20_CR25","unstructured":"Li, W., et al.: Not all steps are equal: efficient generation with progressive diffusion models. arXiv preprint arXiv:2312.13307 (2023)"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Q-diffusion: quantizing diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17535\u201317545 (2023)","DOI":"10.1109\/ICCV51070.2023.01608"},{"key":"20_CR27","unstructured":"Li, Y., Xu, S., Cao, X., Sun, X., Zhang, B.: Q-DM: an efficient low-bit quantized diffusion model. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR28","unstructured":"Lipman, Y., Chen, R.T., Ben-Hamu, H., Nickel, M., Le, M.: Flow matching for generative modeling. arXiv preprint arXiv:2210.02747 (2022)"},{"key":"20_CR29","unstructured":"Liu, Q.: Rectified flow: a marginal preserving approach to optimal transport. arXiv preprint arXiv:2209.14577 (2022)"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: Zero-shot one image to 3D object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"20_CR31","unstructured":"Liu, X., Gong, C., Liu, Q.: Flow straight and fast: learning to generate and transfer data with rectified flow. arXiv preprint arXiv:2209.03003 (2022)"},{"key":"20_CR32","unstructured":"Liu, X., Wu, L., Ye, M., Liu, Q.: Let us build bridges: understanding and extending diffusion generative models. arXiv preprint arXiv:2208.14699 (2022)"},{"key":"20_CR33","unstructured":"Liu, X., Zhang, X., Ma, J., Peng, J., Liu, Q.: Instaflow: one step is enough for high-quality diffusion-based text-to-image generation. arXiv preprint arXiv:2309.06380 (2023)"},{"key":"20_CR34","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: DPM-solver: a fast ode solver for diffusion probabilistic model sampling in around 10 steps. In: Advances in Neural Information Processing Systems, vol. 35, pp. 5775\u20135787 (2022)"},{"key":"20_CR35","unstructured":"Luhman, E., Luhman, T.: Knowledge distillation in iterative generative models for improved sampling speed. arXiv preprint arXiv:2101.02388 (2021)"},{"key":"20_CR36","unstructured":"Luo, S., Tan, Y., Huang, L., Li, J., Zhao, H.: Latent consistency models: synthesizing high-resolution images with few-step inference. arXiv preprint arXiv:2310.04378 (2023)"},{"key":"20_CR37","unstructured":"Luo, W., Hu, T., Zhang, S., Sun, J., Li, Z., Zhang, Z.: Diff-instruct: a universal approach for transferring knowledge from pre-trained diffusion models. arXiv preprint arXiv:2305.18455 (2023)"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Ma, X., Fang, G., Wang, X.: DeepCache: accelerating diffusion models for free. arXiv preprint arXiv:2312.00858 (2023)","DOI":"10.1109\/CVPR52733.2024.01492"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Meng, C., et al.: On distillation of guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14297\u201314306 (2023)","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"20_CR40","unstructured":"Molad, E., et al.: Dreamix: video diffusion models are general video editors. arXiv preprint arXiv:2302.01329 (2023)"},{"key":"20_CR41","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"issue":"57","key":"20_CR42","first-page":"1","volume":"22","author":"G Papamakarios","year":"2021","unstructured":"Papamakarios, G., Nalisnick, E., Rezende, D.J., Mohamed, S., Lakshminarayanan, B.: Normalizing flows for probabilistic modeling and inference. J. Mach. Learn. Res. 22(57), 1\u201364 (2021)","journal-title":"J. Mach. Learn. Res."},{"key":"20_CR43","doi-asserted-by":"crossref","unstructured":"Parmar, G., Zhang, R., Zhu, J.Y.: On aliased resizing and surprising subtleties in GAN evaluation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11410\u201311420 (2022)","DOI":"10.1109\/CVPR52688.2022.01112"},{"key":"20_CR44","unstructured":"Pernias, P., Rampas, D., Richter, M.L., Pal, C., Aubreville, M.: W\u00fcrstchen: an efficient architecture for large-scale text-to-image diffusion models. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"20_CR45","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"20_CR46","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"20_CR47","unstructured":"Razavi, A., Van\u00a0den Oord, A., Vinyals, O.: Generating diverse high-fidelity images with VQ-VAE-2. In: Advances in neural information processing systems, vol. 32 (2019)"},{"key":"20_CR48","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"20_CR49","unstructured":"Salimans, T., Ho, J.: Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512 (2022)"},{"key":"20_CR50","doi-asserted-by":"crossref","unstructured":"Shang, Y., Yuan, Z., Xie, B., Wu, B., Yan, Y.: Post-training quantization on diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1972\u20131981 (2023)","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"20_CR51","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"20_CR52","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"20_CR53","unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models. arXiv preprint arXiv:2303.01469 (2023)"},{"key":"20_CR54","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR55","unstructured":"Song, Y., Garg, S., Shi, J., Ermon, S.: Sliced score matching: a scalable approach to density and score estimation. In: Uncertainty in Artificial Intelligence, pp. 574\u2013584. PMLR (2020)"},{"key":"20_CR56","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"20_CR57","doi-asserted-by":"crossref","unstructured":"Wang, C., Wang, Z., Xu, X., Tang, Y., Zhou, J., Lu, J.: Towards accurate data-free quantization for diffusion models. arXiv preprint arXiv:2305.18723 (2023)","DOI":"10.1109\/CVPR52733.2024.01517"},{"key":"20_CR58","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"20_CR59","unstructured":"Wang, Z., et al.: ProlificDreamer: high-fidelity and diverse text-to-3d generation with variational score distillation. arXiv preprint arXiv:2305.16213 (2023)"},{"key":"20_CR60","doi-asserted-by":"crossref","unstructured":"Wimbauer, F., et\u00a0al.: Cache me if you can: accelerating diffusion models through block caching. arXiv preprint arXiv:2312.03209 (2023)","DOI":"10.1109\/CVPR52733.2024.00594"},{"key":"20_CR61","unstructured":"Wu, L., Gong, C., Liu, X., Ye, M., Liu, Q.: Diffusion-based molecule generation with informative prior bridges. In: Advances in Neural Information Processing Systems, vol. 35, pp. 36533\u201336545 (2022)"},{"key":"20_CR62","doi-asserted-by":"crossref","unstructured":"Xu, Y., Zhao, Y., Xiao, Z., Hou, T.: UFOGen: you forward once large scale text-to-image generation via diffusion GANs. arXiv preprint arXiv:2311.09257 (2023)","DOI":"10.1109\/CVPR52733.2024.00783"},{"key":"20_CR63","doi-asserted-by":"crossref","unstructured":"Yang, X., Zhou, D., Feng, J., Wang, X.: Diffusion probabilistic model made slim. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22552\u201322562 (2023)","DOI":"10.1109\/CVPR52729.2023.02160"},{"key":"20_CR64","doi-asserted-by":"crossref","unstructured":"Ye, S., Liu, F.: Score mismatching for generative modeling. arXiv preprint arXiv:2309.11043 (2023)","DOI":"10.1016\/j.neunet.2024.106311"},{"key":"20_CR65","doi-asserted-by":"crossref","unstructured":"Yin, T., et al.: One-step diffusion with distribution matching distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6613\u20136623 (2024)","DOI":"10.1109\/CVPR52733.2024.00632"},{"key":"20_CR66","unstructured":"Zhang, Q., Chen, Y.: Fast sampling of diffusion models with exponential integrator. arXiv preprint arXiv:2204.13902 (2022)"},{"key":"20_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"20_CR68","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xu, Y., Xiao, Z., Hou, T.: MobileDiffusion: subsecond text-to-image generation on mobile devices. arXiv preprint arXiv:2311.16567 (2023)","DOI":"10.1007\/978-3-031-73033-7_13"},{"key":"20_CR69","unstructured":"Zheng, H., Nie, W., Vahdat, A., Azizzadenesheli, K., Anandkumar, A.: Fast sampling of diffusion models via operator learning. In: International Conference on Machine Learning, pp. 42390\u201342402. PMLR (2023)"},{"key":"20_CR70","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Chen, D., Wang, C., Chen, C.: Fast ode-based sampling for diffusion models in around 5 steps. arXiv preprint arXiv:2312.00094 (2023)","DOI":"10.1109\/CVPR52733.2024.00743"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73007-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T22:38:38Z","timestamp":1732833518000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73007-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,1]]},"ISBN":["9783031730061","9783031730078"],"references-count":70,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73007-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,1]]},"assertion":[{"value":"1 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}