{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T06:35:06Z","timestamp":1770532506355,"version":"3.49.0"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031731150","type":"print"},{"value":"9783031731167","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73116-7_26","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:15:38Z","timestamp":1730301338000},"page":"450-466","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Video Editing via\u00a0Factorized Diffusion Distillation"],"prefix":"10.1007","author":[{"given":"Uriel","family":"Singer","sequence":"first","affiliation":[]},{"given":"Amit","family":"Zohar","sequence":"additional","affiliation":[]},{"given":"Yuval","family":"Kirstain","sequence":"additional","affiliation":[]},{"given":"Shelly","family":"Sheynin","sequence":"additional","affiliation":[]},{"given":"Adam","family":"Polyak","sequence":"additional","affiliation":[]},{"given":"Devi","family":"Parikh","sequence":"additional","affiliation":[]},{"given":"Yaniv","family":"Taigman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"26_CR1","unstructured":"Bar-Tal, O., et al.: Lumiere: a space-time diffusion model for video generation. ArXiv abs\/2401.12945 (2024). https:\/\/api.semanticscholar.org\/CorpusID:267095113"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: learning to follow image editing instructions. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"26_CR3","unstructured":"Brooks, T., et al.: Video generation models as world simulators (2024). https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Ceylan, D., Huang, C.H.P., Mitra, N.J.: Pix2Video: video editing using image diffusion. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 23149\u201323160 (2023). https:\/\/api.semanticscholar.org\/CorpusID:257663916","DOI":"10.1109\/ICCV51070.2023.02121"},{"key":"26_CR5","unstructured":"Cheng, J., Xiao, T., He, T.: Consistent video-to-video transfer using synthetic dataset. ArXiv abs\/2311.00213 (2023). https:\/\/api.semanticscholar.org\/CorpusID:264833165"},{"key":"26_CR6","unstructured":"Cheng, J., Xiao, T., He, T.: Consistent video-to-video transfer using synthetic dataset. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=IoKRezZMxF"},{"key":"26_CR7","unstructured":"Dai, X., et al.: Emu: enhancing image generation models using photogenic needles in a haystack (2023)"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Esser, P., Chiu, J., Atighehchian, P., Granskog, J., Germanidis, A.: Structure and content-guided video synthesis with diffusion models. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7312\u20137322 (2023). https:\/\/api.semanticscholar.org\/CorpusID:256615582","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Gal, R., Patashnik, O., Maron, H., Chechik, G., Cohen-Or, D.: StyleGAN-NADA: CLIP-guided domain adaptation of image generators. arXiv preprint arXiv:2108.00946 (2021)","DOI":"10.1145\/3528223.3530164"},{"key":"26_CR10","unstructured":"Geyer, M., Bar-Tal, O., Bagon, S., Dekel, T.: TokenFlow: consistent diffusion features for consistent video editing. ArXiv abs\/2307.10373 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259991741"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: Emu video: factorizing text-to-video generation by explicit image conditioning (2023)","DOI":"10.1007\/978-3-031-73033-7_12"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Goodfellow, I.J., et al.: Generative adversarial networks. In: 2023 14th International Conference on Computing Communication and Networking Technologies (ICCCNT), pp.\u00a01\u20137 (2022). https:\/\/api.semanticscholar.org\/CorpusID:1033682","DOI":"10.1109\/ICCCNT56998.2023.10306417"},{"key":"26_CR13","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)"},{"key":"26_CR14","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"26_CR15","unstructured":"Hu, L., Gao, X., Zhang, P., Sun, K., Zhang, B., Bo, L.: Animate anyone: consistent and controllable image-to-video synthesis for character animation. arXiv preprint arXiv:2311.17117 (2023)"},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Kara, O., Kurtkaya, B., Yesiltepe, H., Rehg, J.M., Yanardag, P.: RAVE: randomized noise shuffling for fast and consistent video editing with diffusion models. arXiv preprint arXiv:2312.04524 (2023)","DOI":"10.1109\/CVPR52733.2024.00622"},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Khachatryan, L., et al.: Text2Video-zero: text-to-image diffusion models are zero-shot video generators. arXiv preprint arXiv:2303.13439 (2023)","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"26_CR18","unstructured":"Kirstain, Y., Polyak, A., Singer, U., Matiana, S., Penna, J., Levy, O.: Pick-a-Pic: an open dataset of user preferences for text-to-image generation. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Li, X., Ma, C., Yang, X., Yang, M.H.: VidToMe: video token merging for zero-shot video editing. arXiv preprint arXiv:2312.10656 (2023)","DOI":"10.1109\/CVPR52733.2024.00715"},{"key":"26_CR20","unstructured":"Liang, F., et al.: FlowVid: taming imperfect optical flows for consistent video-to-video synthesis. ArXiv abs\/2312.17681 (2023). https:\/\/api.semanticscholar.org\/CorpusID:266690780"},{"key":"26_CR21","unstructured":"Lim, J.H., Ye, J.C.: Geometric GAN. arXiv preprint arXiv:1705.02894 (2017)"},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Lin, S., Liu, B., Li, J., Yang, X.: Common diffusion noise schedules and sample steps are flawed. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5404\u20135411 (2024)","DOI":"10.1109\/WACV57701.2024.00532"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Ma, H., et al.: MaskINT: video editing via interpolative non-autoregressive masked transformers. arxiv preprint (2023)","DOI":"10.1109\/CVPR52733.2024.00707"},{"key":"26_CR24","unstructured":"Meng, C., et al.: SDEdit: guided image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073 (2021)"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Meng, C., et al.: On distillation of guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14297\u201314306 (2023)","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"26_CR26","unstructured":"Oquab, M., et\u00a0al.: DINOv2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"26_CR27","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: text-to-3D using 2D diffusion. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=FjNys5c7VyY"},{"key":"26_CR28","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"26_CR30","unstructured":"Sauer, A., Lorenz, D., Blattmann, A., Rombach, R.: Adversarial diffusion distillation. ArXiv abs\/2311.17042 (2023). https:\/\/api.semanticscholar.org\/CorpusID:265466173"},{"key":"26_CR31","doi-asserted-by":"crossref","unstructured":"Sauer, A., Lorenz, D., Blattmann, A., Rombach, R.: Adversarial diffusion distillation (2023)","DOI":"10.1007\/978-3-031-73016-0_6"},{"key":"26_CR32","doi-asserted-by":"crossref","unstructured":"Sheynin, S., et al.: Emu edit: precise image editing via recognition and generation tasks (2023)","DOI":"10.1109\/CVPR52733.2024.00847"},{"key":"26_CR33","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"26_CR34","unstructured":"Wang, W., et al.: Zero-shot video editing using off-the-shelf image diffusion models. arXiv preprint arXiv:2303.17599 (2023)"},{"key":"26_CR35","unstructured":"Wang, Y., et\u00a0al.: InternVid: a large-scale video-text dataset for multimodal understanding and generation. arXiv preprint arXiv:2307.06942 (2023)"},{"key":"26_CR36","unstructured":"Wu, B., et al.: Fairy: fast parallelized instruction-guided video-to-video synthesis. ArXiv abs\/2312.13834 (2023). https:\/\/api.semanticscholar.org\/CorpusID:266435967"},{"key":"26_CR37","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"26_CR38","unstructured":"Wu, J.Z., et\u00a0al.: CVPR 2023 text guided video editing competition. arXiv preprint arXiv:2310.16003 (2023)"},{"key":"26_CR39","unstructured":"Yan, W., Brown, A., Abbeel, P., Girdhar, R., Azadi, S.: Motion-conditioned image animation for video editing. ArXiv abs\/2311.18827 (2023). https:\/\/api.semanticscholar.org\/CorpusID:265506378"},{"key":"26_CR40","doi-asserted-by":"crossref","unstructured":"Yang, S., Zhou, Y., Liu, Z., Loy, C.C.: Rerender a video: zero-shot text-guided video-to-video translation. In: ACM SIGGRAPH Asia 2023 Conference Proceedings (2023)","DOI":"10.1145\/3610548.3618160"},{"key":"26_CR41","unstructured":"Yang, S., Mou, C., Yu, J., Wang, Y., Meng, X., Zhang, J.: Neural video fields editing. arXiv preprint arXiv:2312.08882 (2023)"},{"key":"26_CR42","doi-asserted-by":"crossref","unstructured":"Yatim, D., Fridman, R., Tal, O.B., Kasten, Y., Dekel, T.: Space-time diffusion features for zero-shot text-driven motion transfer. arXiv preprint arXiv:2311.17009 (2023)","DOI":"10.1109\/CVPR52733.2024.00809"},{"key":"26_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3836\u20133847, October 2023","DOI":"10.1109\/ICCV51070.2023.00355"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73116-7_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T14:14:18Z","timestamp":1732976058000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73116-7_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031731150","9783031731167"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73116-7_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}