{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,12]],"date-time":"2025-07-12T22:57:25Z","timestamp":1752361045625,"version":"3.40.3"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031726392"},{"type":"electronic","value":"9783031726408"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72640-8_20","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:34:20Z","timestamp":1730108060000},"page":"356-372","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Memory-Efficient Fine-Tuning for\u00a0Quantized Diffusion Model"],"prefix":"10.1007","author":[{"given":"Hyogon","family":"Ryu","sequence":"first","affiliation":[]},{"given":"Seohyun","family":"Lim","sequence":"additional","affiliation":[]},{"given":"Hyunjung","family":"Shim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"20_CR1","unstructured":"Balaji, Y., et\u00a0al.: eDiffi: text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)"},{"key":"20_CR2","unstructured":"Banner, R., Nahshan, Y., Soudry, D.: Post training 4-bit quantization of convolutional networks for rapid-deployment. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR3","unstructured":"Betker, J., et\u00a0al.: Improving image generation with better captions. Computer Science (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Choi, J., Lee, J., Shin, C., Kim, S., Kim, H., Yoon, S.: Perception prioritized training of diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11472\u201311481 (2022)","DOI":"10.1109\/CVPR52688.2022.01118"},{"key":"20_CR5","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: QLoRA: efficient finetuning of quantized LLMs. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR6","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"20_CR7","unstructured":"Ding, M., Zheng, W., Hong, W., Tang, J.: CogView2: faster and better text-to-image generation via hierarchical transformers. arXiv preprint arXiv:2204.14217 (2022)"},{"key":"20_CR8","unstructured":"Esser, S.K., McKinstry, J.L., Bablani, D., Appuswamy, R., Modha, D.S.: Learned step size quantization. arXiv preprint arXiv:1902.08153 (2019)"},{"key":"20_CR9","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Gong, Z., et al.: PreQuant: a task-agnostic quantization approach for pre-trained language models. arXiv preprint arXiv:2306.00014 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.511"},{"key":"20_CR11","unstructured":"Han, S., Mao, H., Dally, W.J.: Deep compression: compressing deep neural networks with pruning, trained quantization and Huffman coding. arXiv preprint arXiv:1510.00149 (2015)"},{"key":"20_CR12","unstructured":"Hao, S., Han, K., Zhao, S., Wong, K.Y.K.: ViCo: plug-and-play visual condition for personalized text-to-image generation. arXiv preprint arXiv:2306.00971 (2023)"},{"key":"20_CR13","unstructured":"He, Y., Liu, J., Wu, W., Zhou, H., Zhuang, B.: EfficientDM: efficient quantization-aware fine-tuning of low-bit diffusion models. arXiv preprint arXiv:2310.03270 (2023)"},{"key":"20_CR14","unstructured":"He, Y., Liu, L., Liu, J., Wu, W., Zhou, H., Zhuang, B.: PTQD: accurate post-training quantization for diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR15","doi-asserted-by":"publisher","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Le\u00a0Bras, R., Choi, Y.: CLIPScore: a reference-free evaluation metric for image captioning. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.T. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 7514\u20137528. Association for Computational Linguistics, Online and Punta Cana, Dominican Republic, November 2021. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.595, https:\/\/aclanthology.org\/2021.emnlp-main.595","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"20_CR16","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"20_CR17","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"20_CR18","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"20_CR19","unstructured":"Hubara, I., Nahshan, Y., Hanani, Y., Banner, R., Soudry, D.: Accurate post training quantization with small calibration sets. In: International Conference on Machine Learning, pp. 4466\u20134475. PMLR (2021)"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Jacob, B., et al.: Quantization and training of neural networks for efficient integer-arithmetic-only inference. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2704\u20132713 (2018)","DOI":"10.1109\/CVPR.2018.00286"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Jung, S., et al.: Learning to quantize deep networks by optimizing quantization intervals with task loss. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4350\u20134359 (2019)","DOI":"10.1109\/CVPR.2019.00448"},{"key":"20_CR22","unstructured":"Kim, J., et al.: Memory-efficient fine-tuning of compressed large language models via sub-4-bit integer quantization. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Kwon, S.J., et al.: AlphaTuning: quantization-aware parameter-efficient adaptation of large-scale pre-trained language models. arXiv preprint arXiv:2210.03858 (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.240"},{"key":"20_CR25","unstructured":"Lee, C., Jin, J., Kim, T., Kim, H., Park, E.: OWQ: lessons learned from activation outliers for weight quantization in large language models. arXiv preprint arXiv:2306.02272 (2023)"},{"key":"20_CR26","unstructured":"Lee, Y., Kim, J.Y., Go, H., Jeong, M., Oh, S., Choi, S.: Multi-architecture multi-expert diffusion models. arXiv preprint arXiv:2306.04990 (2023)"},{"key":"20_CR27","unstructured":"Li, F., Liu, B., Wang, X., Zhang, B., Yan, J.: Ternary weight networks. arXiv preprint arXiv:1605.04711 (2016)"},{"key":"20_CR28","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.neucom.2022.01.029","volume":"479","author":"H Li","year":"2022","unstructured":"Li, H., et al.: SRDiff: single image super-resolution with diffusion probabilistic models. Neurocomputing 479, 47\u201359 (2022)","journal-title":"Neurocomputing"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Q-Diffusion: quantizing diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17535\u201317545 (2023)","DOI":"10.1109\/ICCV51070.2023.01608"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Lin, Z., Madotto, A., Fung, P.: Exploring versatile generative language model via parameter-efficient transfer learning. arXiv preprint arXiv:2004.03829 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.41"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Van\u00a0Gool, L.: Repaint: inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11461\u201311471 (2022)","DOI":"10.1109\/CVPR52688.2022.01117"},{"issue":"1","key":"20_CR32","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Nagel, M., Baalen, M.V., Blankevoort, T., Welling, M.: Data-free quantization through weight equalization and bias correction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1325\u20131334 (2019)","DOI":"10.1109\/ICCV.2019.00141"},{"key":"20_CR34","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"key":"20_CR35","unstructured":"Oquab, M., et al.: DinoV2: learning robust visual features without supervision (2023)"},{"key":"20_CR36","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"20_CR37","unstructured":"Przewlocka-Rus, D., Sarwar, S.S., Sumbul, H.E., Li, Y., De\u00a0Salvo, B.: Power-of-two quantization for low bitwidth and hardware compliant neural networks. arXiv preprint arXiv:2203.05025 (2022)"},{"key":"20_CR38","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR (2021)"},{"key":"20_CR39","unstructured":"Rebuffi, S.A., Bilen, H., Vedaldi, A.: Learning multiple visual domains with residual adapters. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR40","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"20_CR41","doi-asserted-by":"crossref","unstructured":"R\u00fcckl\u00e9, A., et al.: AdapterDrop: on the efficiency of adapters in transformers. arXiv preprint arXiv:2010.11918 (2020)","DOI":"10.18653\/v1\/2021.emnlp-main.626"},{"key":"20_CR42","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"20_CR43","unstructured":"Ryu, S.: Low-rank adaptation for fast text-to-image diffusion fine-tuning. https:\/\/github.com\/cloneofsimo\/lora"},{"key":"20_CR44","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 (2022)"},{"issue":"4","key":"20_CR45","first-page":"4713","volume":"45","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D.J., Norouzi, M.: Image super-resolution via iterative refinement. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 4713\u20134726 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR46","doi-asserted-by":"crossref","unstructured":"Shang, Y., Yuan, Z., Xie, B., Wu, B., Yan, Y.: Post-training quantization on diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1972\u20131981 (2023)","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"20_CR47","unstructured":"Wu, D., Tang, Q., Zhao, Y., Zhang, M., Fu, Y., Zhang, D.: EasyQuant: post-training quantization via scale optimization. arXiv preprint arXiv:2006.16669 (2020)"},{"key":"20_CR48","unstructured":"Xiao, G., Lin, J., Seznec, M., Wu, H., Demouth, J., Han, S.: SmoothQuant: accurate and efficient post-training quantization for large language models. In: International Conference on Machine Learning, pp. 38087\u201338099. PMLR (2023)"},{"key":"20_CR49","first-page":"27168","volume":"35","author":"Z Yao","year":"2022","unstructured":"Yao, Z., Yazdani Aminabadi, R., Zhang, M., Wu, X., Li, C., He, Y.: ZeroQuant: efficient and affordable post-training quantization for large-scale transformers. Adv. Neural. Inf. Process. Syst. 35, 27168\u201327183 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"20_CR50","doi-asserted-by":"crossref","unstructured":"Yeh, R.A., Chen, C., Yian\u00a0Lim, T., Schwing, A.G., Hasegawa-Johnson, M., Do, M.N.: Semantic image inpainting with deep generative models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5485\u20135493 (2017)","DOI":"10.1109\/CVPR.2017.728"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72640-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:47:53Z","timestamp":1730108873000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72640-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031726392","9783031726408"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72640-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}