{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T10:05:05Z","timestamp":1742983505878,"version":"3.40.3"},"publisher-location":"Cham","reference-count":65,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031732010"},{"type":"electronic","value":"9783031732027"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73202-7_17","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T14:19:26Z","timestamp":1732112366000},"page":"288-306","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["RECON: Training-Free Acceleration for\u00a0Text-to-Image Synthesis with\u00a0Retrieval of\u00a0Concept Prompt Trajectories"],"prefix":"10.1007","author":[{"given":"Chen-Yi","family":"Lu","sequence":"first","affiliation":[]},{"given":"Shubham","family":"Agarwal","sequence":"additional","affiliation":[]},{"given":"Md Mehrab","family":"Tanjim","sequence":"additional","affiliation":[]},{"given":"Kanak","family":"Mahadik","sequence":"additional","affiliation":[]},{"given":"Anup","family":"Rao","sequence":"additional","affiliation":[]},{"given":"Subrata","family":"Mitra","sequence":"additional","affiliation":[]},{"given":"Shiv Kumar","family":"Saini","sequence":"additional","affiliation":[]},{"given":"Saurabh","family":"Bagchi","sequence":"additional","affiliation":[]},{"given":"Somali","family":"Chaterji","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"unstructured":"christophschuhmann\/improved-aesthetic-predictor: CLIP+MLP aesthetic score predictor. https:\/\/github.com\/christophschuhmann\/improved-aesthetic-predictor","key":"17_CR1"},{"unstructured":"Dall-e 2 (2023). https:\/\/openai.com\/dall-e-2","key":"17_CR2"},{"unstructured":"Agarwal, S., Mitra, S., Chakraborty, S., Karanam, S., Mukherjee, K., Saini, S.K.: Approximate caching for efficiently serving text-to-image diffusion models. In: 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 2024), pp. 1173\u20131189. USENIX Association, Santa Clara (2024). https:\/\/www.usenix.org\/conference\/nsdi24\/presentation\/agarwal-shubham","key":"17_CR3"},{"unstructured":"Bojanowski, P., Joulin, A., Lopez-Paz, D., Szlam, A.: Optimizing the latent space of generative networks. arXiv preprint arXiv:1707.05776 (2017)","key":"17_CR4"},{"key":"17_CR5","doi-asserted-by":"publisher","first-page":"904","DOI":"10.3758\/s13428-013-0403-5","volume":"46","author":"M Brysbaert","year":"2014","unstructured":"Brysbaert, M., Warriner, A.B., Kuperman, V.: Concreteness ratings for 40 thousand generally known English word lemmas. Behav. Res. Methods 46, 904\u2013911 (2014)","journal-title":"Behav. Res. Methods"},{"doi-asserted-by":"crossref","unstructured":"Cai, D., Wang, Y., Liu, L., Shi, S.: Recent advances in retrieval-augmented text generation. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3417\u20133419 (2022)","key":"17_CR6","DOI":"10.1145\/3477495.3532682"},{"unstructured":"Chen, X., et al.: Microsoft COCO captions: data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)","key":"17_CR7"},{"unstructured":"Choi, J., Choi, Y., Kim, Y., Kim, J., Yoon, S.: Custom-edit: text-guided image editing with customized diffusion models. arXiv preprint arXiv:2305.15779 (2023)","key":"17_CR8"},{"unstructured":"Chung, H.W., et\u00a0al.: Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)","key":"17_CR9"},{"key":"17_CR10","doi-asserted-by":"publisher","first-page":"57674","DOI":"10.1109\/ACCESS.2023.3283772","volume":"11","author":"Y Deng","year":"2023","unstructured":"Deng, Y., Wu, N., Qiu, C., Luo, Y., Chen, Y.: MixGAN-TTS: efficient and stable speech synthesis based on diffusion model. IEEE Access 11, 57674\u201357682 (2023)","journal-title":"IEEE Access"},{"unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems 34, pp. 8780\u20138794 (2021)","key":"17_CR11"},{"unstructured":"Du, Y., Li, S., Mordatch, I.: Compositional visual generation with energy based models. In: Advances in Neural Information Processing Systems 33, pp. 6637\u20136647 (2020)","key":"17_CR12"},{"unstructured":"Gallego, V.: Personalizing text-to-image generation via aesthetic gradients. arXiv preprint arXiv:2209.12330 (2022)","key":"17_CR13"},{"unstructured":"Gu, J., Zhai, S., Zhang, Y., Liu, L., Susskind, J.M.: BOOT: data-free distillation of denoising diffusion models with bootstrapping. In: ICML 2023 Workshop on Structured Probabilistic Inference and Generative Modeling (2023)","key":"17_CR14"},{"unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control (2022)","key":"17_CR15"},{"unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: Advances in Neural Information Processing Systems 30 (2017)","key":"17_CR16"},{"unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems 33, pp. 6840\u20136851 (2020)","key":"17_CR17"},{"unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)","key":"17_CR18"},{"doi-asserted-by":"crossref","unstructured":"Jeong, J.W., Wang, X.J., Lee, D.H.: Towards measuring the visualness of a concept. In: Proceedings of the 21st ACM International Conference on Information and Knowledge Management, pp. 2415\u20132418 (2012)","key":"17_CR19","DOI":"10.1145\/2396761.2398655"},{"issue":"3","key":"17_CR20","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"doi-asserted-by":"crossref","unstructured":"Karnewar, A., Vedaldi, A., Novotny, D., Mitra, N.J.: HoloDiffusion: training a 3D diffusion model using 2D images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18423\u201318433 (2023)","key":"17_CR21","DOI":"10.1109\/CVPR52729.2023.01767"},{"unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. In: Advances in Neural Information Processing Systems 35, pp. 26565\u201326577 (2022)","key":"17_CR22"},{"unstructured":"Kirstain, Y., Polyak, A., Singer, U., Matiana, S., Penna, J., Levy, O.: Pick-a-Pic: an open dataset of user preferences for text-to-image generation. arXiv preprint arXiv:2305.01569 (2023)","key":"17_CR23"},{"unstructured":"Kong, Z., Ping, W.: On fast sampling of diffusion probabilistic models. In: ICML Workshop on Invertible Neural Networks, Normalizing Flows, and Explicit Likelihood Models (2021)","key":"17_CR24"},{"doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","key":"17_CR25","DOI":"10.1109\/CVPR52729.2023.00192"},{"issue":"1","key":"17_CR26","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TKDE.2020.2981314","volume":"34","author":"J Li","year":"2020","unstructured":"Li, J., Sun, A., Han, J., Li, C.: A survey on deep learning for named entity recognition. IEEE Trans. Knowl. Data Eng. 34(1), 50\u201370 (2020)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"doi-asserted-by":"crossref","unstructured":"Liu, J., Li, C., Ren, Y., Chen, F., Zhao, Z.: DiffSinger: singing voice synthesis via shallow diffusion mechanism. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 11020\u201311028 (2022)","key":"17_CR27","DOI":"10.1609\/aaai.v36i10.21350"},{"unstructured":"Liu, N., Li, S., Du, Y., Tenenbaum, J., Torralba, A.: Learning to compose visual relations. In: Advances in Neural Information Processing Systems 34, pp. 23166\u201323178 (2021)","key":"17_CR28"},{"key":"17_CR29","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1007\/978-3-031-19790-1_26","volume-title":"European Conference on Computer Vision 2022","author":"N Liu","year":"2022","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13677, pp. 423\u2013439. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19790-1_26"},{"unstructured":"Liu, X., Zhang, X., Ma, J., Peng, J., et\u00a0al.: InstaFlow: one step is enough for high-quality diffusion-based text-to-image generation. In: The Twelfth International Conference on Learning Representations (2023)","key":"17_CR30"},{"unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)","key":"17_CR31"},{"key":"17_CR32","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1162\/tacl_a_00232","volume":"1","author":"A Louis","year":"2013","unstructured":"Louis, A., Nenkova, A.: What makes writing great? First experiments on article quality prediction in the science journalism domain. Trans. Assoc. Comput. Linguist. 1, 341\u2013352 (2013)","journal-title":"Trans. Assoc. Comput. Linguist."},{"unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: DPM-Solver: a fast ode solver for diffusion probabilistic model sampling in around 10 steps. In: Advances in Neural Information Processing Systems 35, pp. 5775\u20135787 (2022)","key":"17_CR33"},{"unstructured":"Luhman, E., Luhman, T.: Knowledge distillation in iterative generative models for improved sampling speed. arXiv preprint arXiv:2101.02388 (2021)","key":"17_CR34"},{"doi-asserted-by":"crossref","unstructured":"Meng, C., et al.: On distillation of guided diffusion models. In: CVPR (2023)","key":"17_CR35","DOI":"10.1109\/CVPR52729.2023.01374"},{"unstructured":"Paszke, A., et\u00a0al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32 (2019)","key":"17_CR36"},{"unstructured":"von Platen, P., et al.: Diffusers: state-of-the-art diffusion models (2022). https:\/\/github.com\/huggingface\/diffusers","key":"17_CR37"},{"unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis, pp. 1\u201313 (2024)","key":"17_CR38"},{"unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)","key":"17_CR39"},{"unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with CLIP latents. arXiv preprint arXiv:2204.06125 (2022)","key":"17_CR40"},{"doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","key":"17_CR41","DOI":"10.1109\/CVPR52688.2022.01042"},{"doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","key":"17_CR42","DOI":"10.1109\/CVPR52729.2023.02155"},{"unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Advances in Neural Information Processing Systems 35, pp. 36479\u201336494 (2022)","key":"17_CR43"},{"unstructured":"Salimans, T., Ho, J.: Progressive distillation for fast sampling of diffusion models (2022)","key":"17_CR44"},{"unstructured":"Sheynin, S., et al.: KNN-Diffusion: image generation via large-scale retrieval. arXiv preprint arXiv:2204.02849 (2022)","key":"17_CR45"},{"unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)","key":"17_CR46"},{"key":"17_CR47","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/978-3-030-71637-0_33","volume-title":"Advances in Cognitive Research, Artificial Intelligence and Neuroinformatics","author":"V Solovyev","year":"2021","unstructured":"Solovyev, V.: Concreteness\/abstractness concept: state of the art. In: Velichkovsky, B.M., Balaban, P.M., Ushakov, V.L. (eds.) Intercognsci 2020. AISC, vol. 1358, pp. 275\u2013283. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-71637-0_33"},{"unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2020)","key":"17_CR48"},{"unstructured":"Song, Y., Dhariwal, P.: Improved techniques for training consistency models. In: The Twelfth International Conference on Learning Representations (2023)","key":"17_CR49"},{"unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models. In: Proceedings of Machine Learning Research, vol.\u00a0202. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/song23a.html","key":"17_CR50"},{"unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. In: International Conference on Learning Representations (2020)","key":"17_CR51"},{"doi-asserted-by":"crossref","unstructured":"Verma, G., Rossi, R.A., Tensmeyer, C., Gu, J., Nenkova, A.: Learning the visualness of text using large vision-language models. In: The 2023 Conference on Empirical Methods in Natural Language Processing (2023)","key":"17_CR52","DOI":"10.18653\/v1\/2023.emnlp-main.147"},{"doi-asserted-by":"crossref","unstructured":"Verma, G., Rossi, R.A., Tensmeyer, C., Gu, J., Nenkova, A.: Learning the visualness of text using large vision-language models. arXiv preprint arXiv:2305.10434 (2023)","key":"17_CR53","DOI":"10.18653\/v1\/2023.emnlp-main.147"},{"doi-asserted-by":"crossref","unstructured":"Wang, T., et\u00a0al.: RODIN: a generative model for sculpting 3D digital avatars using diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4563\u20134573 (2023)","key":"17_CR54","DOI":"10.1109\/CVPR52729.2023.00443"},{"doi-asserted-by":"crossref","unstructured":"Wang, Z.J., Montoya, E., Munechika, D., Yang, H., Hoover, B., Chau, D.H.: DiffusionDB: a large-scale prompt gallery dataset for text-to-image generative models. arXiv preprint arXiv:2210.14896 (2022)","key":"17_CR55","DOI":"10.18653\/v1\/2023.acl-long.51"},{"doi-asserted-by":"crossref","unstructured":"Xu, X., Wang, Z., Zhang, G., Wang, K., Shi, H.: Versatile diffusion: text, images and variations all in one diffusion model. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7754\u20137765 (2023)","key":"17_CR56","DOI":"10.1109\/ICCV51070.2023.00713"},{"doi-asserted-by":"crossref","unstructured":"Yasuda, Y., Toda, T.: Text-to-speech synthesis based on latent variable conversion using diffusion probabilistic model and variational autoencoder. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135. IEEE (2023)","key":"17_CR57","DOI":"10.1109\/ICASSP49357.2023.10094298"},{"unstructured":"Zhang, K., Yang, X., Wang, W.Y., Li, L.: ReDi: efficient learning-free diffusion inference via trajectory retrieval. In: Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 41770\u201341785. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/zhang23as.html","key":"17_CR58"},{"unstructured":"Zhang, Q., Chen, Y.: Fast sampling of diffusion models with exponential integrator. In: NeurIPS 2022 Workshop on Score-Based Methods (2022)","key":"17_CR59"},{"unstructured":"Zhang, Q., Chen, Y.: Fast sampling of diffusion models with exponential integrator. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=Loek7hfb46P","key":"17_CR60"},{"doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Inversion-based style transfer with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10146\u201310156 (2023)","key":"17_CR61","DOI":"10.1109\/CVPR52729.2023.00978"},{"unstructured":"Zhao, W., Bai, L., Rao, Y., Zhou, J., Lu, J.: UniPC: a unified predictor-corrector framework for fast sampling of diffusion models. arXiv preprint arXiv:2302.04867 (2023)","key":"17_CR62"},{"unstructured":"Zheng, H., Nie, W., Vahdat, A., Azizzadenesheli, K., Anandkumar, A.: Fast sampling of diffusion models via operator learning. In: International Conference on Machine Learning, pp. 42390\u201342402. PMLR (2023)","key":"17_CR63"},{"doi-asserted-by":"crossref","unstructured":"Zhou, L., Du, Y., Wu, J.: 3D shape generation and completion through point-voxel diffusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5826\u20135835 (2021)","key":"17_CR64","DOI":"10.1109\/ICCV48922.2021.00577"},{"doi-asserted-by":"crossref","unstructured":"Zhou, Y., Liu, B., Zhu, Y., Yang, X., Chen, C., Xu, J.: Shifted diffusion for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10157\u201310166 (2023)","key":"17_CR65","DOI":"10.1109\/CVPR52729.2023.00979"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73202-7_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T15:10:06Z","timestamp":1732115406000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73202-7_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031732010","9783031732027"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73202-7_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}