{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:04:14Z","timestamp":1780931054565,"version":"3.54.1"},"reference-count":54,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T00:00:00Z","timestamp":1779753600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100011104","name":"UAB","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011104","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004377","name":"The Hong Kong PolyU","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004377","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004032","name":"Jilin University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004032","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.patcog.2026.114063","type":"journal-article","created":{"date-parts":[[2026,5,25]],"date-time":"2026-05-25T23:43:25Z","timestamp":1779752605000},"page":"114063","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Training-free image inversion for one-step diffusion models"],"prefix":"10.1016","volume":"180","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5999-9064","authenticated-orcid":false,"given":"Tao","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Senmao","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yaxing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shiqi","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kai","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joost","family":"van de Weijer","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.114063_b1","doi-asserted-by":"crossref","unstructured":"R. Rombach, A. Blattmann, D. Lorenz, P. Esser, B. Ommer, High-Resolution Image Synthesis With Latent Diffusion Models, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2022, pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"10.1016\/j.patcog.2026.114063_b2","series-title":"Latent consistency models: Synthesizing high-resolution images with few-step inference","author":"Luo","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b3","unstructured":"J. Song, C. Meng, S. Ermon, Denoising Diffusion Implicit Models, in: International Conference on Learning Representations, 2021."},{"key":"10.1016\/j.patcog.2026.114063_b4","article-title":"Adversarial diffusion distillation","author":"Sauer","year":"2024","journal-title":"ECCV"},{"key":"10.1016\/j.patcog.2026.114063_b5","doi-asserted-by":"crossref","unstructured":"G. Deutch, R. Gal, D. Garibi, O. Patashnik, D. Cohen-Or, Turboedit: Text-based image editing using few-step diffusion models, in: SIGGRAPH Asia 2024 Conference Papers, 2024, pp. 1\u201312.","DOI":"10.1145\/3680528.3687612"},{"key":"10.1016\/j.patcog.2026.114063_b6","series-title":"European Conference on Computer Vision","first-page":"365","article-title":"Turboedit: Instant text-based image editing","author":"Wu","year":"2025"},{"key":"10.1016\/j.patcog.2026.114063_b7","series-title":"European Conference on Computer Vision","first-page":"395","article-title":"Renoise: Real image inversion through iterative noising","author":"Garibi","year":"2024"},{"key":"10.1016\/j.patcog.2026.114063_b8","doi-asserted-by":"crossref","unstructured":"I. Huberman-Spiegelglas, V. Kulikov, T. Michaeli, An edit friendly ddpm noise space: Inversion and manipulations, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 12469\u201312478.","DOI":"10.1109\/CVPR52733.2024.01185"},{"key":"10.1016\/j.patcog.2026.114063_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112041","article-title":"Point2pix-Zero: Point-driven refined diffusion for multi-object image editing","volume":"170","author":"Wang","year":"2026","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114063_b10","article-title":"Prompt-to-prompt image editing with cross attention control","author":"Hertz","year":"2023","journal-title":"ICLR"},{"key":"10.1016\/j.patcog.2026.114063_b11","doi-asserted-by":"crossref","unstructured":"N. Tumanyan, M. Geyer, S. Bagon, T. Dekel, Plug-and-play diffusion features for text-driven image-to-image translation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 1921\u20131930.","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"10.1016\/j.patcog.2026.114063_b12","series-title":"Negative-prompt inversion: Fast image inversion for editing with text-guided diffusion models","author":"Miyake","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b13","article-title":"Dynamic prompt learning: Addressing cross-attention leakage for text-based image editing","author":"Wang","year":"2023","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.114063_b14","series-title":"SwiftEdit: Lightning fast text-guided image editing via one-step diffusion","author":"Nguyen","year":"2024"},{"key":"10.1016\/j.patcog.2026.114063_b15","article-title":"Null-text inversion for editing real images using guided diffusion models","author":"Mokady","year":"2023","journal-title":"CVPR"},{"key":"10.1016\/j.patcog.2026.114063_b16","unstructured":"S. Li, J. van de Weijer, T. Hu, F.S. Khan, Q. Hou, Y. Wang, J. Yang, StyleDiffusion: Prompt-Embedding Inversion for Text-Based Editing, in: Computational Visual Media Conference, 2024."},{"key":"10.1016\/j.patcog.2026.114063_b17","article-title":"Real-world image variation by aligning diffusion inversion chain","volume":"36","author":"Zhang","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114063_b18","doi-asserted-by":"crossref","unstructured":"M.N. Everaert, A. Fitsios, M. Bocchio, S. Arpa, S. S\u00fcsstrunk, R. Achanta, Exploiting the signal-leak bias in diffusion models, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2024, pp. 4025\u20134034.","DOI":"10.1109\/WACV57701.2024.00398"},{"key":"10.1016\/j.patcog.2026.114063_b19","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114063_b20","doi-asserted-by":"crossref","unstructured":"H. Liu, C. Li, Y. Li, Y.J. Lee, Improved baselines with visual instruction tuning, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 26296\u201326306.","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"10.1016\/j.patcog.2026.114063_b21","series-title":"Direct inversion: Boosting diffusion-based editing with 3 lines of code","author":"Ju","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b22","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","author":"Saharia","year":"2022","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.114063_b23","doi-asserted-by":"crossref","unstructured":"T. Yin, M. Gharbi, R. Zhang, E. Shechtman, F. Durand, W.T. Freeman, T. Park, One-step diffusion with distribution matching distillation, in: CVPR, 2024, pp. 6613\u20136623.","DOI":"10.1109\/CVPR52733.2024.00632"},{"key":"10.1016\/j.patcog.2026.114063_b24","article-title":"Diff-instruct: A universal approach for transferring knowledge from pre-trained diffusion models","volume":"36","author":"Luo","year":"2023","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.114063_b25","article-title":"SwiftBrush: One-step text-to-image diffusion model with variational score distillation","author":"Nguyen","year":"2024","journal-title":"CVPR"},{"key":"10.1016\/j.patcog.2026.114063_b26","unstructured":"T. Karras, M. Aittala, S. Laine, E. H\u00e4rk\u00f6nen, J. Hellsten, J. Lehtinen, T. Aila, Alias-free generative adversarial networks, in: NeurIPS, Vol. 34, 2021, pp. 852\u2013863."},{"key":"10.1016\/j.patcog.2026.114063_b27","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111022","article-title":"FICE: Text-conditioned fashion-image editing with guided GAN inversion","volume":"158","author":"Pernu\u0161","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114063_b28","series-title":"Auto-encoding variational bayes","author":"Kingma","year":"2013"},{"key":"10.1016\/j.patcog.2026.114063_b29","doi-asserted-by":"crossref","unstructured":"P. Esser, R. Rombach, B. Ommer, Taming transformers for high-resolution image synthesis, in: CVPR, 2021, pp. 12873\u201312883.","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"10.1016\/j.patcog.2026.114063_b30","unstructured":"A. Razavi, A. Van den Oord, O. Vinyals, Generating diverse high-fidelity images with vq-vae-2, in: NeurIPS, 2019."},{"key":"10.1016\/j.patcog.2026.114063_b31","unstructured":"A. Van Den Oord, N. Kalchbrenner, K. Kavukcuoglu, Pixel recurrent neural networks, in: ICML, 2016."},{"key":"10.1016\/j.patcog.2026.114063_b32","article-title":"Density estimation using real nvp","author":"Dinh","year":"2017","journal-title":"ICLR"},{"key":"10.1016\/j.patcog.2026.114063_b33","series-title":"Consistency Models","first-page":"32211","author":"Song","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b34","series-title":"Latent consistency models: Synthesizing high-resolution images with few-step inference","author":"Luo","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b35","doi-asserted-by":"crossref","unstructured":"C. Tang, K. Wang, J. van de Weijer, IterInv: Iterative Inversion for Pixel-Level T2I Models, in: Neurips 2023 workshop on Diffusion Models, 2023.","DOI":"10.1109\/ICME57554.2024.10687547"},{"key":"10.1016\/j.patcog.2026.114063_b36","series-title":"LocInv: Localization-aware inversion for text-guided image editing","author":"Tang","year":"2024"},{"key":"10.1016\/j.patcog.2026.114063_b37","doi-asserted-by":"crossref","unstructured":"Y. Lin, Y.-W. Chen, Y.-H. Tsai, L. Jiang, M.-H. Yang, Text-driven image editing via learnable regions, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 7059\u20137068.","DOI":"10.1109\/CVPR52733.2024.00674"},{"key":"10.1016\/j.patcog.2026.114063_b38","series-title":"KV inversion: KV embeddings learning for text-conditioned real image action editing","author":"Huang","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b39","unstructured":"H. Chang, H. Zhang, J. Barber, A. Maschinot, J. Lezama, L. Jiang, M.-H. Yang, K. Murphy, W.T. Freeman, M. Rubinstein, et al., Muse: Text-To-Image Generation via Masked Generative Transformers, in: International Conference on Machine Learning, 2023."},{"key":"10.1016\/j.patcog.2026.114063_b40","series-title":"Hierarchical text-conditional image generation with clip latents","author":"Ramesh","year":"2022"},{"key":"10.1016\/j.patcog.2026.114063_b41","article-title":"Imagic: Text-based real image editing with diffusion models","author":"Kawar","year":"2023","journal-title":"CVPR"},{"key":"10.1016\/j.patcog.2026.114063_b42","series-title":"Direct inversion: Boosting diffusion-based editing with 3 lines of code","author":"Ju","year":"2023"},{"key":"10.1016\/j.patcog.2026.114063_b43","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114063_b44","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.patcog.2026.114063_b45","first-page":"5775","article-title":"Dpm-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps","volume":"35","author":"Lu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114063_b46","article-title":"Denoising diffusion implicit models","author":"Song","year":"2021","journal-title":"ICLR"},{"key":"10.1016\/j.patcog.2026.114063_b47","article-title":"Invertible consistency distillation for text-guided image editing in around 7 steps","author":"Starodubcev","year":"2024","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.114063_b48","doi-asserted-by":"crossref","unstructured":"N. Tumanyan, O. Bar-Tal, S. Bagon, T. Dekel, Splicing ViT Features for Semantic Appearance Transfer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 10748\u201310757.","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"10.1016\/j.patcog.2026.114063_b49","doi-asserted-by":"crossref","unstructured":"R. Zhang, P. Isola, A.A. Efros, E. Shechtman, O. Wang, The unreasonable effectiveness of deep features as a perceptual metric, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 586\u2013595.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"10.1016\/j.patcog.2026.114063_b50","series-title":"The Thrity-Seventh Asilomar Conference on Signals, Systems & Computers, 2003","first-page":"1398","article-title":"Multiscale structural similarity for image quality assessment","volume":"Vol. 2","author":"Wang","year":"2003"},{"key":"10.1016\/j.patcog.2026.114063_b51","article-title":"Flowalign: Trajectory-regularized, inversion-free flow-based image editing","author":"Kim","year":"2026","journal-title":"ICLR"},{"key":"10.1016\/j.patcog.2026.114063_b52","series-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017"},{"key":"10.1016\/j.patcog.2026.114063_b53","article-title":"Plug-and-play diffusion features for text-driven image-to-image translation","author":"Tumanyan","year":"2023","journal-title":"CVPR"},{"key":"10.1016\/j.patcog.2026.114063_b54","series-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326010289?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326010289?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T14:45:51Z","timestamp":1780929951000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326010289"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":54,"alternative-id":["S0031320326010289"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114063","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Training-free image inversion for one-step diffusion models","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114063","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"114063"}}