{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T12:57:02Z","timestamp":1743080222381,"version":"3.40.3"},"publisher-location":"Cham","reference-count":96,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031727634"},{"type":"electronic","value":"9783031727641"}],"license":[{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72764-1_18","type":"book-chapter","created":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T14:03:10Z","timestamp":1729778590000},"page":"307-326","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Noise Calibration: Plug-and-Play Content-Preserving Video Enhancement Using Pre-trained Video Diffusion Models"],"prefix":"10.1007","author":[{"given":"Qinyu","family":"Yang","sequence":"first","affiliation":[]},{"given":"Haoxin","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Menghan","family":"Xia","sequence":"additional","affiliation":[]},{"given":"Xiaodong","family":"Cun","sequence":"additional","affiliation":[]},{"given":"Zhixun","family":"Su","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Shan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,25]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, N., Kwon, P., Back, J., Hong, K., Kim, S.: Interactive cartoonization with controllable perceptual factors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16827\u201316835 (2023)","DOI":"10.1109\/CVPR52729.2023.01614"},{"key":"18_CR2","unstructured":"An, J., et al.: Latent-shift: Latent diffusion with temporal shift for efficient text-to-video generation. arXiv preprint arXiv:2304.08477 (2023)"},{"issue":"4","key":"18_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592450","volume":"42","author":"O Avrahami","year":"2023","unstructured":"Avrahami, O., Fried, O., Lischinski, D.: Blended latent diffusion. ACM Trans. Graph. (TOG) 42(4), 1\u201311 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Lischinski, D., Fried, O.: Blended diffusion for text-driven editing of natural images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18208\u201318218 (2022)","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Balaji, Y., Min, M.R., Bai, B., Chellappa, R., Graf, H.P.: Conditional gan with discriminative filter generation for text-to-video synthesis. In: IJCAI, vol.\u00a01, p.\u00a02 (2019)","DOI":"10.24963\/ijcai.2019\/276"},{"key":"18_CR6","unstructured":"Bao, F., Li, C., Zhu, J., Zhang, B.: Analytic-dpm: an analytic estimate of the optimal reverse variance in diffusion probabilistic models. arXiv preprint arXiv:2201.06503 (2022)"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Brack, M., et al.: Ledits++: Limitless image editing using text-to-image models. arXiv preprint arXiv:2311.16711 (2023)","DOI":"10.1109\/CVPR52733.2024.00846"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Chan, K.C.K., Zhou, S., Xu, X., Loy, C.C.: Basicvsr++: Improving video super-resolution with enhanced propagation and alignment (2021)","DOI":"10.1109\/CVPR52688.2022.00588"},{"key":"18_CR10","unstructured":"Chen, C., et al.: Iterative token evaluation and refinement for real-world super-resolution. arXiv preprint arXiv:2312.05616 (2023)"},{"key":"18_CR11","unstructured":"Chen, H., et\u00a0al.: Videocrafter1: Open diffusion models for high-quality video generation. arXiv preprint arXiv:2310.19512 (2023)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Choi, J., Kim, S., Jeong, Y., Gwon, Y., Yoon, S.: Ilvr: Conditioning method for denoising diffusion probabilistic models. arXiv preprint arXiv:2108.02938 (2021)","DOI":"10.1109\/ICCV48922.2021.01410"},{"key":"18_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1007\/978-3-319-46723-8_49","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2016","author":"\u00d6 \u00c7i\u00e7ek","year":"2016","unstructured":"\u00c7i\u00e7ek, \u00d6., Abdulkadir, A., Lienkamp, S.S., Brox, T., Ronneberger, O.: 3D U-Net: learning dense volumetric segmentation from sparse annotation. In: Ourselin, S., Joskowicz, L., Sabuncu, M.R., Unal, G., Wells, W. (eds.) MICCAI 2016. LNCS, vol. 9901, pp. 424\u2013432. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46723-8_49"},{"issue":"1","key":"18_CR14","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell, A., White, T., Dumoulin, V., Arulkumaran, K., Sengupta, B., Bharath, A.A.: Generative adversarial networks: an overview. IEEE Signal Process. Mag. 35(1), 53\u201365 (2018)","journal-title":"IEEE Signal Process. Mag."},{"key":"18_CR15","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR16","unstructured":"Dockhorn, T., Vahdat, A., Kreis, K.: Score-based generative modeling with critically-damped langevin diffusion. arXiv preprint arXiv:2112.07068 (2021)"},{"issue":"11","key":"18_CR17","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"18_CR18","unstructured":"Hachnochi, R., et al.: Cross-domain compositing with pretrained diffusion models. arXiv preprint arXiv:2302.10167 (2023)"},{"key":"18_CR19","unstructured":"He, Y., Yang, T., Zhang, Y., Shan, Y., Chen, Q.: Latent video diffusion models for high-fidelity video generation with arbitrary lengths. arXiv preprint arXiv:2211.13221 (2022)"},{"key":"18_CR20","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control (2022). https:\/\/arxiv.org\/abs\/2208.01626 (2022)"},{"key":"18_CR21","unstructured":"Ho, J., et\u00a0al.: Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"18_CR22","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR23","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., Fleet, D.J.: Video diffusion models (2022)"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Hu, Y., Luo, C., Chen, Z.: Make it move: controllable image-to-video generation with text descriptions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18219\u201318228 (2022)","DOI":"10.1109\/CVPR52688.2022.01768"},{"issue":"4","key":"18_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530104","volume":"41","author":"Y Jiang","year":"2022","unstructured":"Jiang, Y., Yang, S., Qiu, H., Wu, W., Loy, C.C., Liu, Z.: Text2human: text-driven controllable human image generation. ACM Trans. Graph. (TOG) 41(4), 1\u201311 (2022)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"18_CR26","unstructured":"Jim\u00e9nez, \u00c1.B.: Mixture of diffusers for scene composition and high resolution image generation. arXiv preprint arXiv:2302.02412 (2023)"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Kawar, B., et al.: Imagic: text-based real image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6007\u20136017 (2023)","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Kim, H., Lee, G., Choi, Y., Kim, J.H., Zhu, J.Y.: 3d-aware blending with generative nerfs. arXiv preprint arXiv:2302.06608 (2023)","DOI":"10.1109\/ICCV51070.2023.02094"},{"key":"18_CR29","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Kingma, D.P., Welling, M., et\u00a0al.: An introduction to variational autoencoders. Foundat. Trends\u00ae Mach. Learn. 12(4), 307\u2013392 (2019)","DOI":"10.1561\/2200000056"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Chopra, S., Hadsell, R., Ranzato, M., Huang, F.: A tutorial on energy-based learning. Predicting Structured Data 1(0) (2006)","DOI":"10.7551\/mitpress\/7443.003.0014"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"Li, B., Xue, K., Liu, B., Lai, Y.K.: Vqbb: Image-to-image translation with vector quantized brownian bridge. arXiv preprint arXiv:2205.07680 (2022)","DOI":"10.1109\/CVPR52729.2023.00194"},{"key":"18_CR33","doi-asserted-by":"publisher","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: European Conference on Computer Vision, pp. 423\u2013439. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-19790-1_26","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Evalcrafter: Benchmarking and evaluating large video generation models. arXiv preprint arXiv:2310.11440 (2023)","DOI":"10.1109\/CVPR52733.2024.02090"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Lu, S., Liu, Y., Kong, A.W.K.: Tf-icon: diffusion-based training-free cross-domain image composition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2294\u20132305 (2023)","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"18_CR36","unstructured":"Luo, F., Xiang, J., Zhang, J., Han, X., Yang, W.: Image super-resolution via latent diffusion: A sampling-space mixture of experts and frequency-augmented decoder approach. arXiv preprint arXiv:2310.12004 (2023)"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Ma, Y., et al.: Follow your pose: Pose-guided text-to-video generation using pose-free videos. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 4117\u20134125 (2024)","DOI":"10.1609\/aaai.v38i5.28206"},{"key":"18_CR38","unstructured":"Ma, Y., et\u00a0al.: Follow-your-click: Open-domain regional image animation via short prompts. arXiv preprint arXiv:2403.08268 (2024)"},{"key":"18_CR39","unstructured":"Ma, Y., et\u00a0al.: Follow-your-emoji: Fine-controllable and expressive freestyle portrait animation. arXiv preprint arXiv:2406.01900 (2024)"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Mei, K., Patel, V.: Vidm: video implicit diffusion models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 9117\u20139125 (2023)","DOI":"10.1609\/aaai.v37i8.26094"},{"key":"18_CR41","unstructured":"Meng, C., He, Y., Song, Y., Song, J., Wu, J., Zhu, J.Y., Ermon, S.: Sdedit: Guided image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073 (2021)"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Miech, A., Zhukov, D., Alayrac, J.B., Tapaswi, M., Laptev, I., Sivic, J.: Howto100m: learning a text-video embedding by watching hundred million narrated video clips. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2630\u20132640 (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"18_CR43","unstructured":"Mishra, S., Saenko, K., Saligrama, V.: Syncdr: Training cross domain retrieval models with synthetic data. arXiv preprint arXiv:2401.00420 (2023)"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6038\u20136047 (2023)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"18_CR45","unstructured":"Ngiam, J., Chen, Z., Koh, P.W., Ng, A.Y.: Learning deep energy models. In: Proceedings of the 28th International Conference on Machine Learning (ICML 2011), pp. 1105\u20131112 (2011)"},{"key":"18_CR46","unstructured":"Nichol, A., et al.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"18_CR47","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Oussidi, A., Elhassouny, A.: Deep generative models: Survey. In: 2018 International Conference on Intelligent Systems and Computer Vision (ISCV), pp.\u00a01\u20138. IEEE (2018)","DOI":"10.1109\/ISACV.2018.8354080"},{"key":"18_CR49","unstructured":"Pandey, K., Mukherjee, A., Rai, P., Kumar, A.: Vaes meet diffusion models: efficient and high-fidelity generation. In: NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications (2021)"},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Parmar, G., Kumar\u00a0Singh, K., Zhang, R., Li, Y., Lu, J., Zhu, J.Y.: Zero-shot image-to-image translation. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591513"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Peng, D., Hu, P., Ke, Q., Liu, J.: Diffusion-based image translation with label guidance for domain adaptive semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 808\u2013820 (2023)","DOI":"10.1109\/ICCV51070.2023.00081"},{"key":"18_CR52","unstructured":"Podell, D.et al.: Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"18_CR53","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: International Conference on Machine Learning, pp. 1530\u20131538. PMLR (2015)"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"18_CR55","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Palette: image-to-image diffusion models. In: ACM SIGGRAPH 2022 Conference Proceedings, pp. 1\u201310 (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"18_CR56","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"4","key":"18_CR57","first-page":"4713","volume":"45","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D.J., Norouzi, M.: Image super-resolution via iterative refinement. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 4713\u20134726 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Si, C., Huang, Z., Jiang, Y., Liu, Z.: Freeu: Free lunch in diffusion u-net. arXiv preprint arXiv:2309.11497 (2023)","DOI":"10.1109\/CVPR52733.2024.00453"},{"key":"18_CR59","doi-asserted-by":"crossref","unstructured":"Singh, J., Gould, S., Zheng, L.: High-fidelity guided image synthesis with latent diffusion models. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5997\u20136006. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.00581"},{"key":"18_CR60","first-page":"12533","volume":"34","author":"A Sinha","year":"2021","unstructured":"Sinha, A., Song, J., Meng, C., Ermon, S.: D2c: diffusion-decoding models for few-shot conditional generation. Adv. Neural. Inf. Process. Syst. 34, 12533\u201312548 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Skorokhodov, I., Tulyakov, S., Elhoseiny, M.: Stylegan-v: A continuous video generator with the price, image quality and perks of stylegan2. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3626\u20133636 (2022)","DOI":"10.1109\/CVPR52688.2022.00361"},{"key":"18_CR62","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"18_CR63","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"18_CR64","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2021)"},{"key":"18_CR65","first-page":"1415","volume":"34","author":"Y Song","year":"2021","unstructured":"Song, Y., Durkan, C., Murray, I., Ermon, S.: Maximum likelihood training of score-based diffusion models. Adv. Neural. Inf. Process. Syst. 34, 1415\u20131428 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR66","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. Adv. Neural Inform. Process. Syst. 32 (2019)"},{"key":"18_CR67","first-page":"12438","volume":"33","author":"Y Song","year":"2020","unstructured":"Song, Y., Ermon, S.: Improved techniques for training score-based generative models. Adv. Neural. Inf. Process. Syst. 33, 12438\u201312448 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR68","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"18_CR69","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: Mocogan: Decomposing motion and content for video generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1526\u20131535 (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"18_CR70","doi-asserted-by":"crossref","unstructured":"Tumanyan, N., Geyer, M., Bagon, S., Dekel, T.: Plug-and-play diffusion features for text-driven image-to-image translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1921\u20131930 (2023)","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"18_CR71","first-page":"11287","volume":"34","author":"A Vahdat","year":"2021","unstructured":"Vahdat, A., Kreis, K., Kautz, J.: Score-based generative modeling in latent space. Adv. Neural. Inf. Process. Syst. 34, 11287\u201311302 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR72","doi-asserted-by":"crossref","unstructured":"Wang, J., Chan, K.C., Loy, C.C.: Exploring clip for assessing the look and feel of images. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 2555\u20132563 (2023)","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"18_CR73","doi-asserted-by":"crossref","unstructured":"Wang, J., Yue, Z., Zhou, S., Chan, K.C., Loy, C.C.: Exploiting diffusion prior for real-world image super-resolution. arXiv preprint arXiv:2305.07015 (2023)","DOI":"10.1007\/s11263-024-02168-7"},{"key":"18_CR74","unstructured":"Wang, J., Yuan, H., Chen, D., Zhang, Y., Wang, X., Zhang, S.: Modelscope text-to-video technical report. arXiv preprint arXiv:2308.06571 (2023)"},{"key":"18_CR75","unstructured":"Wang, T., et al.: Pretraining is all you need for image-to-image translation. arXiv preprint arXiv:2205.12952 (2022)"},{"key":"18_CR76","unstructured":"Wang, W., et al.: Videofactory: Swap attention in spatiotemporal diffusions for text-to-video generation. arXiv preprint arXiv:2305.10874 (2023)"},{"key":"18_CR77","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: G3an: disentangling appearance and motion for video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5264\u20135273 (2020)","DOI":"10.1109\/CVPR42600.2020.00531"},{"key":"18_CR78","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: Imaginator: Conditional spatio-temporal gan for video generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1160\u20131169 (2020)","DOI":"10.1109\/WACV45572.2020.9093492"},{"key":"18_CR79","unstructured":"Wang, Y., et\u00a0al.: Lavie: High-quality video generation with cascaded latent diffusion models. arXiv preprint arXiv:2309.15103 (2023)"},{"key":"18_CR80","doi-asserted-by":"crossref","unstructured":"Wang, Y., Jiang, L., Loy, C.C.: Styleinv: a temporal style modulated inversion network for unconditional video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22851\u201322861 (2023)","DOI":"10.1109\/ICCV51070.2023.02089"},{"issue":"4","key":"18_CR81","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR82","unstructured":"Watson, D., Chan, W., Ho, J., Norouzi, M.: Learning fast samplers for diffusion models by differentiating through sample quality. arXiv preprint arXiv:2202.05830 (2022)"},{"key":"18_CR83","unstructured":"Wolleb, J., Sandk\u00fchler, R., Bieder, F., Cattin, P.C.: The swiss army knife for image-to-image translation: Multi-task diffusion models. arXiv preprint arXiv:2204.02641 (2022)"},{"key":"18_CR84","doi-asserted-by":"crossref","unstructured":"Wu, C.H., De\u00a0la Torre, F.: A latent space of stochastic diffusion models for zero-shot image editing and guidance. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7378\u20137387 (2023)","DOI":"10.1109\/ICCV51070.2023.00678"},{"key":"18_CR85","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: Exploring video quality assessment on user generated contents from aesthetic and technical perspectives. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20144\u201320154 (2023)","DOI":"10.1109\/ICCV51070.2023.01843"},{"key":"18_CR86","doi-asserted-by":"crossref","unstructured":"Xia, B., et al.: Diffir: Efficient diffusion model for image restoration. arXiv preprint arXiv:2303.09472 (2023)","DOI":"10.1109\/ICCV51070.2023.01204"},{"key":"18_CR87","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Paint by example: Exemplar-based image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18381\u201318391 (2023)","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"18_CR88","doi-asserted-by":"crossref","unstructured":"Yang, Z., Chu, T., Lin, X., Gao, E., Liu, D., Yang, J., Wang, C.: Eliminating contextual prior bias for semantic image editing via dual-cycle diffusion. IEEE Trans. Circ. Syst. Video Technol. (2023)","DOI":"10.1109\/TCSVT.2023.3286841"},{"key":"18_CR89","doi-asserted-by":"crossref","unstructured":"Ye, Y., et al.: Affordance diffusion: synthesizing hand-object interactions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22479\u201322489 (2023)","DOI":"10.1109\/CVPR52729.2023.02153"},{"key":"18_CR90","doi-asserted-by":"crossref","unstructured":"Yu, S., Sohn, K., Kim, S., Shin, J.: Video probabilistic diffusion models in projected latent space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18456\u201318466 (2023)","DOI":"10.1109\/CVPR52729.2023.01770"},{"key":"18_CR91","doi-asserted-by":"crossref","unstructured":"Yue, Z., Wang, J., Loy, C.C.: Resshift: Efficient diffusion model for image super-resolution by residual shifting. arXiv preprint arXiv:2307.12348 (2023)","DOI":"10.1109\/TPAMI.2024.3461721"},{"key":"18_CR92","doi-asserted-by":"crossref","unstructured":"Zhang, D.J., et al.: Show-1: Marrying pixel and latent diffusion models for text-to-video generation. arXiv preprint arXiv:2309.15818 (2023)","DOI":"10.1007\/s11263-024-02271-9"},{"key":"18_CR93","unstructured":"Zhang, S., et al.: I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145 (2023)"},{"key":"18_CR94","unstructured":"Zhang, S., Xiao, S., Huang, W.: Forgedit: Text guided image editing via learning and forgetting. arXiv preprint arXiv:2309.10556 (2023)"},{"key":"18_CR95","first-page":"3609","volume":"35","author":"M Zhao","year":"2022","unstructured":"Zhao, M., Bao, F., Li, C., Zhu, J.: Egsde: unpaired image-to-image translation via energy-guided stochastic differential equations. Adv. Neural. Inf. Process. Syst. 35, 3609\u20133623 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR96","unstructured":"Zhou, D., Wang, W., Yan, H., Lv, W., Zhu, Y., Feng, J.: Magicvideo: Efficient video generation with latent diffusion models. arXiv preprint arXiv:2211.11018 (2022)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72764-1_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T06:28:50Z","timestamp":1732948130000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72764-1_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,25]]},"ISBN":["9783031727634","9783031727641"],"references-count":96,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72764-1_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,25]]},"assertion":[{"value":"25 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Like SDEdit, the enhancement effectiveness of our method is also limited by the performance of the base model.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Limitation"}},{"value":"As our method is for improving video quality, it does not introduce additional ethical concerns.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Societal Impact"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}