{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T19:51:31Z","timestamp":1772913091432,"version":"3.50.1"},"publisher-location":"Cham","reference-count":62,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733468","type":"print"},{"value":"9783031733475","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73347-5_23","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"404-421","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Source Prompt Disentangled Inversion for\u00a0Boosting Image Editability with\u00a0Diffusion Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0001-9281","authenticated-orcid":false,"given":"Ruibin","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5761-1777","authenticated-orcid":false,"given":"Ruihuang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9831-2202","authenticated-orcid":false,"given":"Song","family":"Guo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2078-4215","authenticated-orcid":false,"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Lischinski, D., Fried, O.: Blended diffusion for text-driven editing of natural images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18208\u201318218 (2022)","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your latents: high-resolution video synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22563\u201322575 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Cao, M., Wang, X., Qi, Z., Shan, Y., Qie, X., Zheng, Y.: Masactrl: tuning-free mutual self-attention control for consistent image synthesis and editing. arXiv preprint arXiv:2304.08465 (2023)","DOI":"10.1109\/ICCV51070.2023.02062"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Chai, W., Guo, X., Wang, G., Lu, Y.: Stablevideo: text-driven consistency-aware diffusion video editing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 23040\u201323050 (2023)","DOI":"10.1109\/ICCV51070.2023.02106"},{"key":"23_CR7","unstructured":"Cho, H., Lee, J., Kim, S.B., Oh, T.H., Jeong, Y.: Noise map guidance: inversion with spatial context for real image editing. arXiv preprint arXiv:2402.04625 (2024)"},{"key":"23_CR8","unstructured":"Choi, J., Choi, Y., Kim, Y., Kim, J., Yoon, S.: Custom-edit: text-guided image editing with customized diffusion models. arXiv preprint arXiv:2305.15779 (2023)"},{"key":"23_CR9","unstructured":"Elarabawy, A., Kamath, H., Denton, S.: Direct inversion: optimization-free text-driven real image editing with diffusion models. arXiv preprint arXiv:2211.07825 (2022)"},{"key":"23_CR10","unstructured":"Epstein, D., Jabri, A., Poole, B., Efros, A., Holynski, A.: Diffusion self-guidance for controllable image generation. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"23_CR11","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"23_CR12","unstructured":"Guo, Y., et al.: Animatediff: animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)"},{"key":"23_CR13","unstructured":"Han, L., et\u00a0al.: Improving negative-prompt inversion via proximal guidance. arXiv preprint arXiv:2306.05414 (2023)"},{"key":"23_CR14","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)"},{"key":"23_CR15","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"23_CR16","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"23_CR17","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"23_CR18","unstructured":"Hu, L., Gao, X., Zhang, P., Sun, K., Zhang, B., Bo, L.: Animate anyone: consistent and controllable image-to-video synthesis for character animation. arXiv preprint arXiv:2311.17117 (2023)"},{"key":"23_CR19","unstructured":"Huang, Z., Wu, T., Jiang, Y., Chan, K.C., Liu, Z.: Reversion: diffusion-based relation inversion from images. arXiv preprint arXiv:2303.13495 (2023)"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Huberman-Spiegelglas, I., Kulikov, V., Michaeli, T.: An edit friendly ddpm noise space: inversion and manipulations. arXiv preprint arXiv:2304.06140 (2023)","DOI":"10.1109\/CVPR52733.2024.01185"},{"key":"23_CR21","unstructured":"Ju, X., Zeng, A., Bian, Y., Liu, S., Xu, Q.: Direct inversion: boosting diffusion-based editing with 3 lines of code. arXiv preprint arXiv:2310.01506 (2023)"},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Kawar, B., et al.: Imagic: text-based real image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6007\u20136017 (2023)","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"23_CR24","unstructured":"Li, S., et al.: Stylediffusion: prompt-embedding inversion for text-based editing. arXiv preprint arXiv:2303.15649 (2023)"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Li, Z., Cao, M., Wang, X., Qi, Z., Cheng, M.M., Shan, Y.: Photomaker: customizing realistic human photos via stacked id embedding. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"23_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3d object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"23_CR28","unstructured":"Liu, Y., et al.: Syncdreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"23_CR29","unstructured":"Meiri, B., Samuel, D., Darshan, N., Chechik, G., Avidan, S., Ben-Ari, R.: Fixed-point inversion for text-to-image diffusion models. arXiv preprint arXiv:2312.12540 (2023)"},{"key":"23_CR30","unstructured":"Meng, C., et al.: Sdedit: guided image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073 (2021)"},{"key":"23_CR31","unstructured":"Miyake, D., Iohara, A., Saito, Y., Tanaka, T.: Negative-prompt inversion: fast image inversion for editing with text-guided diffusion models. arXiv preprint arXiv:2305.16807 (2023)"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6038\u20136047 (2023)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"23_CR34","unstructured":"Nichol, A., et al.: Glide: towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"23_CR35","doi-asserted-by":"crossref","unstructured":"Pan, Z., Gherardi, R., Xie, X., Huang, S.: Effective real image editing with accelerated iterative diffusion inversion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15912\u201315921 (2023)","DOI":"10.1109\/ICCV51070.2023.01458"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Parmar, G., Kumar\u00a0Singh, K., Zhang, R., Li, Y., Lu, J., Zhu, J.Y.: Zero-shot image-to-image translation. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591513"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4195\u20134205 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"23_CR38","unstructured":"Podell, D., et al.: Sdxl: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"23_CR39","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"23_CR40","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"23_CR41","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents, 1(2), 3 (2022). arXiv preprint arXiv:2204.06125"},{"key":"23_CR42","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"23_CR43","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"23_CR44","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"23_CR45","doi-asserted-by":"crossref","unstructured":"Shi, Y., Xue, C., Pan, J., Zhang, W., Tan, V.Y., Bai, S.: Dragdiffusion: harnessing diffusion models for interactive point-based image editing. arXiv preprint arXiv:2306.14435 (2023)","DOI":"10.1109\/CVPR52733.2024.00844"},{"key":"23_CR46","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2020)"},{"key":"23_CR47","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"23_CR48","doi-asserted-by":"crossref","unstructured":"Tewel, Y., Gal, R., Chechik, G., Atzmon, Y.: Key-locked rank one editing for text-to-image personalization. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591506"},{"key":"23_CR49","unstructured":"Tsaban, L., Passos, A.: Ledits: real image editing with ddpm inversion and semantic guidance. arXiv preprint arXiv:2307.00522 (2023)"},{"key":"23_CR50","doi-asserted-by":"crossref","unstructured":"Tumanyan, N., Geyer, M., Bagon, S., Dekel, T.: Plug-and-play diffusion features for text-driven image-to-image translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1921\u20131930 (2023)","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"23_CR51","unstructured":"Voronov, A., Khoroshikh, M., Babenko, A., Ryabinin, M.: Is this loss informative? faster text-to-image customization by tracking objective dynamics. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"23_CR52","doi-asserted-by":"crossref","unstructured":"Wallace, B., Gokul, A., Naik, N.: Edict: exact diffusion inversion via coupled transformations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22532\u201322541 (2023)","DOI":"10.1109\/CVPR52729.2023.02158"},{"key":"23_CR53","unstructured":"Wang, Q., Bai, X., Wang, H., Qin, Z., Chen, A.: Instantid: zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)"},{"key":"23_CR54","doi-asserted-by":"crossref","unstructured":"Wang, Z., Zhao, L., Xing, W.: Stylediffusion: controllable disentangled style transfer via diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7677\u20137689 (2023)","DOI":"10.1109\/ICCV51070.2023.00706"},{"key":"23_CR55","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:2302.13848 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"23_CR56","unstructured":"Xu, S., Huang, Y., Pan, J., Ma, Z., Chai, J.: Inversion-free image editing with natural language. arXiv preprint arXiv:2312.04965 (2023)"},{"key":"23_CR57","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Paint by example: exemplar-based image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18381\u201318391 (2023)","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"23_CR58","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: Ip-adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"23_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, G., Lewis, J.P., Kleijn, W.B.: Exact diffusion inversion via bi-directional integration approximation. arXiv preprint arXiv:2307.10829 (2023)","DOI":"10.36227\/techrxiv.23713515.v1"},{"key":"23_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"23_CR61","unstructured":"Zhang, Y., Xing, J., Lo, E., Jia, J.: Real-world image variation by aligning diffusion inversion chain. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"23_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Inversion-based style transfer with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10146\u201310156 (2023)","DOI":"10.1109\/CVPR52729.2023.00978"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73347-5_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:51:00Z","timestamp":1730109060000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73347-5_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031733468","9783031733475"],"references-count":62,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73347-5_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}