{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,21]],"date-time":"2026-07-21T14:59:55Z","timestamp":1784645995236,"version":"3.55.0"},"publisher-location":"Cham","reference-count":69,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730153","type":"print"},{"value":"9783031730160","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73016-0_6","type":"book-chapter","created":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T19:02:40Z","timestamp":1729882960000},"page":"87-103","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":242,"title":["Adversarial Diffusion Distillation"],"prefix":"10.1007","author":[{"given":"Axel","family":"Sauer","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dominik","family":"Lorenz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andreas","family":"Blattmann","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Robin","family":"Rombach","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,26]]},"reference":[{"key":"6_CR1","unstructured":"Balaji, Y., et al.: ediff-i: Text-to-image diffusion models with an ensemble of expert denoisers. ArXiv abs\/2211.01324 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253254800"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your latents: high-resolution video synthesis with latent diffusion models. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 22563\u201322575 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258187553","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., Joulin, A.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"6_CR4","volume-title":"Muse: Text-to-image generation via masked generative transformers","author":"H Chang","year":"2023","unstructured":"Chang, H., Zhang, H., Barber, J., Maschinot, A., Lezama, J., Jiang, L., Yang, M.H., Murphy, K., Freeman, W.T., Rubinstein, M., et al.: Muse: Text-to-image generation via masked generative transformers. Proc, ICML (2023)"},{"key":"6_CR5","unstructured":"Dai, X., et\u00a0al.: Emu: enhancing image generation models using photogenic needles in a haystack. arXiv preprint arXiv:2309.15807 (2023)"},{"key":"6_CR6","first-page":"30150","volume":"35","author":"T Dockhorn","year":"2022","unstructured":"Dockhorn, T., Vahdat, A., Kreis, K.: Genie: higher-order denoising diffusion solvers. Adv. Neural. Inf. Process. Syst. 35, 30150\u201330166 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Esser, P., Chiu, J., Atighehchian, P., Granskog, J., Germanidis, A.: Structure and content-guided video synthesis with diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12868\u201312878 (2020). https:\/\/api.semanticscholar.org\/CorpusID:229297973","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"6_CR10","unstructured":"Franceschi, J.Y., et al.: Unifying gans and score-based diffusion as generative particle models. arXiv preprint arXiv:2305.16150 (2023)"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Goodfellow, I.J., et al.: Generative adversarial networks. Commun. ACM 63, 139 \u2013 144 (2014). https:\/\/api.semanticscholar.org\/CorpusID:1033682","DOI":"10.1145\/3422622"},{"key":"6_CR12","unstructured":"Grigoryev, T., Voynov, A., Babenko, A.: When, why, and which pretrained gans are useful? ICLR (2022)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Hertz, A., Aberman, K., Cohen-Or, D.: Delta denoising score. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2328\u20132337 (2023)","DOI":"10.1109\/ICCV51070.2023.00221"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: CLIPScore: a reference-free evaluation metric for image captioning. In: Proc. EMNLP (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"6_CR15","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. NeurIPS (2017)"},{"key":"6_CR16","unstructured":"Ho, J.: Classifier-free diffusion guidance. ArXiv abs\/2207.12598 (2022). https:\/\/api.semanticscholar.org\/CorpusID:249145348"},{"key":"6_CR17","unstructured":"Ho, J., et al.: Imagen video: High definition video generation with diffusion models. ArXiv abs\/2210.02303 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252715883"},{"key":"6_CR18","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. ArXiv abs\/2006.11239 (2020). https:\/\/api.semanticscholar.org\/CorpusID:219955663"},{"key":"6_CR19","unstructured":"Hu, J.E., et al.: Lora: Low-rank adaptation of large language models. ArXiv abs\/2106.09685 (2021). https:\/\/api.semanticscholar.org\/CorpusID:235458009"},{"key":"6_CR20","unstructured":"Huang, Y., Wang, J., Shi, Y., Qi, X., Zha, Z.J., Zhang, L.: Dreamtime: n improved optimization strategy for text-to-3d content creation. arXiv preprint arXiv:2306.12422 (2023)"},{"key":"6_CR21","unstructured":"Jolicoeur-Martineau, A., Pich\u00e9-Taillefer, R., Combes, R.T.d., Mitliagkas, I.: Adversarial score matching and improved sampling for image generation. arXiv preprint arXiv:2009.05475 (2020)"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Kang, M., et al.: Scaling up gans for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10124\u201310134 (2023)","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4396\u20134405 (2018). https:\/\/api.semanticscholar.org\/CorpusID:54482423","DOI":"10.1109\/CVPR.2019.00453"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of stylegan. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8107\u20138116 (2019). https:\/\/api.semanticscholar.org\/CorpusID:209202273","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"6_CR25","unstructured":"Katzir, O., Patashnik, O., Cohen-Or, D., Lischinski, D.: Noise-free score distillation. arXiv preprint arXiv:2310.17590 (2023)"},{"key":"6_CR26","unstructured":"Kim, D., et al.: Consistency trajectory models: Learning probability flow ode trajectory of diffusion. arXiv preprint arXiv:2310.02279 (2023)"},{"key":"6_CR27","unstructured":"Li, Y., et al.: Snapfusion: text-to-image diffusion model on mobile devices within two seconds. arXiv preprint arXiv:2306.00980 (2023)"},{"key":"6_CR28","unstructured":"Lim, J.H., Ye, J.C.: Geometric gan. arXiv preprint arXiv:1705.02894 (2017)"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Lin, S., Liu, B., Li, J., Yang, X.: Common diffusion noise schedules and sample steps are flawed (2023)","DOI":"10.1109\/WACV57701.2024.00532"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Microsoft coco: common objects in context (2015)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"6_CR31","unstructured":"Liu, X., Gong, C., et\u00a0al.: Flow straight and fast: Learning to generate and transfer data with rectified flow. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"6_CR32","unstructured":"Liu, X., Zhang, X., Ma, J., Peng, J., Liu, Q.: Instaflow: one step is enough for high-quality diffusion-based text-to-image generation. arXiv preprint arXiv:2309.06380 (2023)"},{"key":"6_CR33","first-page":"5775","volume":"35","author":"C Lu","year":"2022","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: Dpm-solver: fast ode solver for diffusion probabilistic model sampling in around 10 steps. Adv. Neural. Inf. Process. Syst. 35, 5775\u20135787 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR34","unstructured":"Luo, S., Tan, Y., Huang, L., Li, J., Zhao, H.: Latent consistency models: Synthesizing high-resolution images with few-step inference. ArXiv abs\/2310.04378 (2023). https:\/\/api.semanticscholar.org\/CorpusID:263831037"},{"key":"6_CR35","unstructured":"Luo, S., Tan, Y., Huang, L., Li, J., Zhao, H.: Latent consistency models: Synthesizing high-resolution images with few-step inference. arXiv preprint arXiv:2310.04378 (2023)"},{"key":"6_CR36","unstructured":"Luo, S., et al.: Lcm-lora: a universal stable-diffusion acceleration module. ArXiv abs\/2311.05556 (2023). https:\/\/api.semanticscholar.org\/CorpusID:265067414"},{"key":"6_CR37","unstructured":"Luo, S., et al.: Lcm-lora: a universal stable-diffusion acceleration module. arXiv preprint arXiv:2311.05556 (2023)"},{"key":"6_CR38","unstructured":"Luo, W., Hu, T., Zhang, S., Sun, J., Li, Z., Zhang, Z.: Diff-instruct: a universal approach for transferring knowledge from pre-trained diffusion models. arXiv preprint arXiv:2305.18455 (2023)"},{"key":"6_CR39","doi-asserted-by":"crossref","unstructured":"Meng, C., et al.: On distillation of guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14297\u201314306 (2023)","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"6_CR40","unstructured":"Mescheder, L., Geiger, A., Nowozin, S.: Which training methods for gans do actually converge? In: International Conference on Machine Learning, pp. 3481\u20133490. PMLR (2018)"},{"key":"6_CR41","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., Cohen-Or, D.: Latent-nerf for shape-guided generation of 3d shapes and textures. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12663\u201312673 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253510536","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"6_CR42","unstructured":"Miyato, T., Koyama, M.: cgans with projection discriminator. arXiv preprint arXiv:1802.05637 (2018)"},{"key":"6_CR43","unstructured":"Oquab, M., et\u00a0al.: Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"6_CR44","unstructured":"Patil, S., Berman, W., von Platen, P.: Amused: An open muse model (2023). https:\/\/github.com\/huggingface\/diffusers"},{"key":"6_CR45","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4195\u20134205 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"6_CR46","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., Rombach, R.: Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"6_CR47","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"6_CR48","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"6_CR49","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. ArXiv abs\/2204.06125 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248097655"},{"key":"6_CR50","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10674\u201310685 (2021). https:\/\/api.semanticscholar.org\/CorpusID:245335280","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR51","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR52","unstructured":"Salimans, T., Ho, J.: Progressive distillation for fast sampling of diffusion models. CoRR abs\/2202.00512 (2022). https:\/\/arxiv.org\/abs\/2202.00512"},{"key":"6_CR53","first-page":"17480","volume":"34","author":"A Sauer","year":"2021","unstructured":"Sauer, A., Chitta, K., M\u00fcller, J., Geiger, A.: Projected gans converge faster. Adv. Neural. Inf. Process. Syst. 34, 17480\u201317492 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR54","volume-title":"Stylegan-t: unlocking the power of gans for fast large-scale text-to-image synthesis","author":"A Sauer","year":"2023","unstructured":"Sauer, A., Karras, T., Laine, S., Geiger, A., Aila, T.: Stylegan-t: unlocking the power of gans for fast large-scale text-to-image synthesis. Proc, ICML (2023)"},{"key":"6_CR55","doi-asserted-by":"crossref","unstructured":"Sauer, A., Schwarz, K., Geiger, A.: Stylegan-xl: scaling stylegan to large diverse datasets. In: ACM SIGGRAPH 2022 Conference Proceedings (2022). https:\/\/api.semanticscholar.org\/CorpusID:246441861","DOI":"10.1145\/3528233.3530738"},{"key":"6_CR56","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: Generative adversarial networks are special cases of artificial curiosity (1990) and also closely related to predictability minimization (1991) (2020)","DOI":"10.1016\/j.neunet.2020.04.008"},{"key":"6_CR57","unstructured":"Schuhmann, C., et\u00a0al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: NeurIPS (2022)"},{"key":"6_CR58","unstructured":"Singer, U., et\u00a0al.: Text-to-4d dynamic scene generation. arXiv preprint arXiv:2301.11280 (2023)"},{"key":"6_CR59","unstructured":"Sohl-Dickstein, J.N., Weiss, E.A., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. ArXiv abs\/1503.03585 (2015). https:\/\/api.semanticscholar.org\/CorpusID:14888175"},{"key":"6_CR60","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"6_CR61","unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models. In: International Conference on Machine Learning (2023). https:\/\/api.semanticscholar.org\/CorpusID:257280191"},{"key":"6_CR62","unstructured":"Song, Y., Sohl-Dickstein, J.N., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. ArXiv abs\/2011.13456 (2020). https:\/\/api.semanticscholar.org\/CorpusID:227209335"},{"key":"6_CR63","unstructured":"Stein, G., et al.: Exposing flaws of generative model evaluation metrics and their unfair treatment of diffusion models. arXiv preprint arXiv:2306.04675 (2023)"},{"key":"6_CR64","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score jacobian chaining: lifting pretrained 2d diffusion models for 3d generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"6_CR65","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3d generation with variational score distillation. ArXiv abs\/2305.16213 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258887357"},{"key":"6_CR66","unstructured":"Xiao, Z., Kreis, K., Vahdat, A.: Tackling the generative learning trilemma with denoising diffusion gans. arXiv preprint arXiv:2112.07804 (2021)"},{"key":"6_CR67","unstructured":"Xu, Y., Zhao, Y., Xiao, Z., Hou, T.: Ufogen: you forward once large scale text-to-image generation via diffusion gans. arXiv preprint arXiv:2311.09257 (2023). https:\/\/api.semanticscholar.org\/CorpusID:265221033"},{"key":"6_CR68","unstructured":"Yao, C.H., et al.: Artic3d: learning robust articulated 3d shapes from noisy web image collections. arXiv preprint arXiv:2306.04619 (2023)"},{"key":"6_CR69","unstructured":"Zhang, Q., Chen, Y.: Fast sampling of diffusion models with exponential integrator. arXiv preprint arXiv:2204.13902 (2022)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73016-0_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T19:03:49Z","timestamp":1729883029000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73016-0_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,26]]},"ISBN":["9783031730153","9783031730160"],"references-count":69,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73016-0_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,26]]},"assertion":[{"value":"26 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}