{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T20:28:11Z","timestamp":1773260891604,"version":"3.50.1"},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,29]],"date-time":"2024-12-29T00:00:00Z","timestamp":1735430400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,29]],"date-time":"2024-12-29T00:00:00Z","timestamp":1735430400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00530-024-01625-0","type":"journal-article","created":{"date-parts":[[2024,12,29]],"date-time":"2024-12-29T13:13:02Z","timestamp":1735477982000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Aggregating multi-scale flow-enhanced information in transformer for video inpainting"],"prefix":"10.1007","volume":"31","author":[{"given":"Guanxiao","family":"Li","sequence":"first","affiliation":[]},{"given":"Ke","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Su","sequence":"additional","affiliation":[]},{"given":"Jingyu","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,29]]},"reference":[{"issue":"1","key":"1625_CR1","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/s00530-023-01211-w","volume":"30","author":"W Miao","year":"2024","unstructured":"Miao, W., Wang, L., Lu, H., Huang, K., Shi, X., Liu, B.: ITrans: generative image inpainting with transformers. Multimedia Syst. 30(1), 21 (2024)","journal-title":"Multimedia Syst."},{"issue":"2","key":"1625_CR2","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s00530-024-01290-3","volume":"30","author":"C Dong","year":"2024","unstructured":"Dong, C., Liu, H., Wang, X., Bi, X.: Image inpainting method based on AU-GAN. Multimedia Syst. 30(2), 101 (2024)","journal-title":"Multimedia Syst."},{"key":"1625_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106323","volume":"123","author":"G Li","year":"2023","unstructured":"Li, G., Zhang, K., Su, Y., Wang, J.: Feature pre-inpainting enhanced transformer for video inpainting. Eng. Appl. Artif. Intell. 123, 106323 (2023). https:\/\/doi.org\/10.1016\/j.engappai.2023.106323","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"6","key":"1625_CR4","doi-asserted-by":"publisher","first-page":"3193","DOI":"10.1007\/s00530-023-01189-5","volume":"29","author":"Y Huang","year":"2023","unstructured":"Huang, Y., Lu, J., Chen, N., Ding, H., Shang, Y.: A deep learning image inpainting method based on stationary wavelet transform. Multimedia Systems 29(6), 3193\u20133207 (2023)","journal-title":"Multimedia Systems"},{"issue":"6","key":"1625_CR5","doi-asserted-by":"publisher","first-page":"3819","DOI":"10.1007\/s00530-023-01184-w","volume":"29","author":"F Xiao","year":"2023","unstructured":"Xiao, F., Zhang, Z., Yao, Y.: CTNet: hybrid architecture based on CNN and transformer for image inpainting detection. Multimedia Syst. 29(6), 3819\u20133832 (2023)","journal-title":"Multimedia Syst."},{"issue":"8","key":"1625_CR6","doi-asserted-by":"publisher","first-page":"2953","DOI":"10.1109\/TCSVT.2020.3034422","volume":"31","author":"C Wang","year":"2020","unstructured":"Wang, C., Chen, X., Min, S., Wang, J., Zha, Z.-J.: Structure-guided deep video inpainting. IEEE Trans. Circuits Syst. Video Technol. 31(8), 2953\u20132965 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1625_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107789","volume":"131","author":"C-H Yeh","year":"2024","unstructured":"Yeh, C.-H., Yang, H.-F., Lin, Y.-Y., Huang, W.-J., Tsai, F.-H., Kang, L.-W.: Fine-grained video super-resolution via spatial-temporal learning and image detail enhancement. Eng. Appl. Artif. Intell. 131, 107789 (2024). https:\/\/doi.org\/10.1016\/j.engappai.2023.107789","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1625_CR8","doi-asserted-by":"crossref","unstructured":"Szeto, R., Corso, J.J.: The devil is in the details: A diagnostic evaluation benchmark for video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21054\u201321063 (2022)","DOI":"10.1109\/CVPR52688.2022.02038"},{"issue":"5","key":"1625_CR9","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1109\/TCYB.2020.3018120","volume":"52","author":"J Wang","year":"2022","unstructured":"Wang, J., Zhang, G., Zhang, K., Zhao, Y., Wang, Q., Li, X.: Detection of small aerial object using random projection feature with region clustering. IEEE Trans. Cybern. 52(5), 3957\u20133970 (2022). https:\/\/doi.org\/10.1109\/TCYB.2020.3018120","journal-title":"IEEE Trans. Cybern."},{"key":"1625_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105108","volume":"114","author":"H Fu","year":"2022","unstructured":"Fu, H., Zhang, K., Li, H., Wang, J.: MRRNet: learning multiple region representation for video person re-identification. Eng. Appl. Artif. Intell. 114, 105108 (2022). https:\/\/doi.org\/10.1016\/j.engappai.2022.105108","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1625_CR11","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1109\/LSP.2024.3361805","volume":"31","author":"K Zhang","year":"2024","unstructured":"Zhang, K., Li, G., Su, Y., Wang, J.: WTVI: a wavelet-based transformer network for video inpainting. IEEE Signal Process. Lett. 31, 616\u2013620 (2024)","journal-title":"IEEE Signal Process. Lett."},{"key":"1625_CR12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3117964","author":"R Liu","year":"2021","unstructured":"Liu, R., Li, B., Zhu, Y.: Temporal group fusion network for deep video inpainting. IEEE Trans. Circuits Syst. Video Technol. (2021). https:\/\/doi.org\/10.1109\/TCSVT.2021.3117964","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1625_CR13","doi-asserted-by":"crossref","unstructured":"Kim, D., Woo, S., Lee, J.-Y., Kweon, I.S.: Deep blind video decaptioning by temporal aggregation and recurrence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4263\u20134272 (2019)","DOI":"10.1109\/CVPR.2019.00439"},{"key":"1625_CR14","doi-asserted-by":"crossref","unstructured":"Xu, R., Li, X., Zhou, B., Loy, C.C.: Deep flow-guided video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3723\u20133732 (2019)","DOI":"10.1109\/CVPR.2019.00384"},{"key":"1625_CR15","doi-asserted-by":"crossref","unstructured":"Gao, C., Saraf, A., Huang, J.-B., Kopf, J.: Flow-edge guided video completion. In: European Conference on Computer Vision, pp. 713\u2013729. Springer (2020)","DOI":"10.1007\/978-3-030-58610-2_42"},{"key":"1625_CR16","doi-asserted-by":"publisher","first-page":"8429","DOI":"10.1109\/TIP.2020.3013168","volume":"29","author":"L Tian","year":"2020","unstructured":"Tian, L., Tu, Z., Zhang, D., Liu, J., Li, B., Yuan, J.: Unsupervised learning of optical flow with CNN-based non-local filtering. IEEE Trans. Image Process. 29, 8429\u20138442 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"1625_CR17","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/j.image.2018.12.002","volume":"72","author":"Z Tu","year":"2019","unstructured":"Tu, Z., Xie, W., Zhang, D., Poppe, R., Veltkamp, R.C., Li, B., Yuan, J.: A survey of variational and CNN-based optical flow techniques. Signal Process. Image Commun. 72, 9\u201324 (2019)","journal-title":"Signal Process. Image Commun."},{"key":"1625_CR18","doi-asserted-by":"crossref","unstructured":"Zeng, Y., Fu, J., Chao, H.: Learning joint spatial-temporal transformations for video inpainting. In: European Conference on Computer Vision, pp. 528\u2013543. Springer (2020)","DOI":"10.1007\/978-3-030-58517-4_31"},{"key":"1625_CR19","unstructured":"Liu, R., Deng, H., Huang, Y., Shi, X., Lu, L., Sun, W., Wang, X., Hongsheng, L.: Decoupled spatial-temporal transformer for video inpainting. arXiv preprint arXiv:2104.06637 (2021)"},{"key":"1625_CR20","doi-asserted-by":"crossref","unstructured":"Li, Z., Lu, C.-Z., Qin, J., Guo, C.-L., Cheng, M.-M.: Towards an end-to-end framework for flow-guided video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17562\u201317571 (2022)","DOI":"10.1109\/CVPR52688.2022.01704"},{"key":"1625_CR21","doi-asserted-by":"crossref","unstructured":"Fan, H., Xiong, B., Mangalam, K., Li, Y., Yan, Z., Malik, J., Feichtenhofer, C.: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6824\u20136835 (2021)","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"1625_CR22","first-page":"4479","volume":"33","author":"L Chi","year":"2020","unstructured":"Chi, L., Jiang, B., Mu, Y.: Fast fourier convolution. Adv. Neural Inf. Process. Syst. 33, 4479\u20134488 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1625_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. arXiv preprint arXiv:2103.14030 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1625_CR24","doi-asserted-by":"crossref","unstructured":"Efros, A.A., Freeman, W.T.: Image quilting for texture synthesis and transfer. In: Proceedings of the 28th Annual Conference on Computer Graphics and Interactive Techniques, pp. 341\u2013346 (2001)","DOI":"10.1145\/383259.383296"},{"issue":"8","key":"1625_CR25","doi-asserted-by":"publisher","first-page":"882","DOI":"10.1109\/TIP.2003.815261","volume":"12","author":"M Bertalmio","year":"2003","unstructured":"Bertalmio, M., Vese, L., Sapiro, G., Osher, S.: Simultaneous structure and texture image inpainting. IEEE Trans. Image Process. 12(8), 882\u2013889 (2003)","journal-title":"IEEE Trans. Image Process."},{"issue":"6","key":"1625_CR26","doi-asserted-by":"publisher","first-page":"915","DOI":"10.1109\/TCSVT.2014.2302380","volume":"24","author":"J Zhang","year":"2014","unstructured":"Zhang, J., Zhao, D., Xiong, R., Ma, S., Gao, W.: Image restoration using joint statistical modeling in a space-transform domain. IEEE Trans. Circuits Syst. Video Technol. 24(6), 915\u2013928 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1625_CR27","doi-asserted-by":"crossref","unstructured":"Bertalmio, M., Sapiro, G., Caselles, V., Ballester, C.: Image inpainting. In: Proceedings of the 27th Annual Conference on Computer Graphics and Interactive Techniques, pp. 417\u2013424 (2000)","DOI":"10.1145\/344779.344972"},{"issue":"8","key":"1625_CR28","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.1109\/83.935036","volume":"10","author":"C Ballester","year":"2001","unstructured":"Ballester, C., Bertalmio, M., Caselles, V., Sapiro, G., Verdera, J.: Filling-in by joint interpolation of vector fields and gray levels. IEEE Trans. Image Process. 10(8), 1200\u20131211 (2001)","journal-title":"IEEE Trans. Image Process."},{"issue":"10","key":"1625_CR29","doi-asserted-by":"publisher","first-page":"10393","DOI":"10.1109\/TCYB.2021.3069836","volume":"52","author":"J Wang","year":"2022","unstructured":"Wang, J., Ma, Z., Nie, F., Li, X.: Progressive self-supervised clustering with novel category discovery. IEEE Trans. Cybern. 52(10), 10393\u201310406 (2022). https:\/\/doi.org\/10.1109\/TCYB.2021.3069836","journal-title":"IEEE Trans. Cybern."},{"issue":"8","key":"1625_CR30","doi-asserted-by":"publisher","first-page":"3802","DOI":"10.1007\/s00034-019-01029-w","volume":"38","author":"G Sridevi","year":"2019","unstructured":"Sridevi, G., Srinivas Kumar, S.: Image inpainting based on fractional-order nonlinear diffusion for image reconstruction. Circuits Syst. Signal Process. 38(8), 3802\u20133817 (2019)","journal-title":"Circuits Syst. Signal Process."},{"key":"1625_CR31","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Donahue, J., Darrell, T., Efros, A.A.: Context encoders: feature learning by inpainting. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2536\u20132544 (2016)","DOI":"10.1109\/CVPR.2016.278"},{"key":"1625_CR32","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, vol .27 (2014)"},{"key":"1625_CR33","doi-asserted-by":"crossref","unstructured":"Liu, G., Reda, F.A., Shih, K.J., Wang, T.-C., Tao, A., Catanzaro, B.: Image inpainting for irregular holes using partial convolutions. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 85\u2013100 (2018)","DOI":"10.1007\/978-3-030-01252-6_6"},{"key":"1625_CR34","doi-asserted-by":"crossref","unstructured":"Zeng, Y., Fu, J., Chao, H., Guo, B.: Learning pyramid-context encoder network for high-quality image inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1486\u20131494 (2019)","DOI":"10.1109\/CVPR.2019.00158"},{"key":"1625_CR35","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: International Conference on Medical Image Computing and Computer-assisted Intervention, pp. 234\u2013241 (2015). Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"4","key":"1625_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073659","volume":"36","author":"S Iizuka","year":"2017","unstructured":"Iizuka, S., Simo-Serra, E., Ishikawa, H.: Globally and locally consistent image completion. ACM Trans. Graph. (ToG) 36(4), 1\u201314 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"1625_CR37","doi-asserted-by":"crossref","unstructured":"Yu, J., Lin, Z., Yang, J., Shen, X., Lu, X., Huang, T.S.: Generative image inpainting with contextual attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5505\u20135514 (2018)","DOI":"10.1109\/CVPR.2018.00577"},{"key":"1625_CR38","doi-asserted-by":"crossref","unstructured":"Suvorov, R., Logacheva, E., Mashikhin, A., Remizova, A., Ashukha, A., Silvestrov, A., Kong, N., Goka, H., Park, K., Lempitsky, V.: Resolution-robust large mask inpainting with fourier convolutions. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2149\u20132159 (2022)","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"1625_CR39","doi-asserted-by":"publisher","first-page":"2382","DOI":"10.1109\/TMM.2022.3146774","volume":"25","author":"J Wang","year":"2022","unstructured":"Wang, J., Chen, S., Wu, Z., Jiang, Y.-G.: FT-TDR: frequency-guided transformer and top-down refinement network for blind face inpainting. IEEE Trans. Multimedia 25, 2382\u20132392 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"1625_CR40","doi-asserted-by":"crossref","unstructured":"Yu, J., Lin, Z., Yang, J., Shen, X., Lu, X., Huang, T.S.: Free-form image inpainting with gated convolution. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4471\u20134480 (2019)","DOI":"10.1109\/ICCV.2019.00457"},{"key":"1625_CR41","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"1625_CR42","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 408\u2013417 (2017)","DOI":"10.1109\/ICCV.2017.52"},{"key":"1625_CR43","doi-asserted-by":"crossref","unstructured":"Chan, K.C., Wang, X., Yu, K., Dong, C., Loy, C.C.: BasicVSR: the search for essential components in video super-resolution and beyond. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4947\u20134956 (2021)","DOI":"10.1109\/CVPR46437.2021.00491"},{"key":"1625_CR44","unstructured":"Liang, J., Cao, J., Fan, Y., Zhang, K., Ranjan, R., Li, Y., Timofte, R., Van\u00a0Gool, L.: VRT: a video restoration transformer. arXiv preprint arXiv:2201.12288 (2022)"},{"key":"1625_CR45","doi-asserted-by":"crossref","unstructured":"Cheng, J., Tsai, Y.-H., Wang, S., Yang, M.-H.: SegFlow: joint learning for video object segmentation and optical flow. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 686\u2013695 (2017)","DOI":"10.1109\/ICCV.2017.81"},{"key":"1625_CR46","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H., Yang, M.-H., Black, M.J.: Video segmentation via object flow. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3899\u20133908 (2016)","DOI":"10.1109\/CVPR.2016.423"},{"issue":"5","key":"1625_CR47","doi-asserted-by":"publisher","first-page":"1038","DOI":"10.1109\/TPAMI.2019.2958083","volume":"42","author":"D Kim","year":"2019","unstructured":"Kim, D., Woo, S., Lee, J.-Y., Kweon, I.S.: Recurrent temporal aggregation framework for deep video inpainting. IEEE Trans. Pattern Anal. Mach. Intell. 42(5), 1038\u20131052 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1625_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, K., Fu, J., Liu, D.: Inertia-guided flow completion and style fusion for video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5982\u20135991 (2022)","DOI":"10.1109\/CVPR52688.2022.00589"},{"key":"1625_CR49","doi-asserted-by":"crossref","unstructured":"Zou, X., Yang, L., Liu, D., Lee, Y.J.: Progressive temporal feature alignment network for video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16448\u201316457 (2021)","DOI":"10.1109\/CVPR46437.2021.01618"},{"key":"1625_CR50","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"1625_CR51","doi-asserted-by":"crossref","unstructured":"Tian, Y., Zhang, Y., Fu, Y., Xu, C.: TDAN: temporally-deformable alignment network for video super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3360\u20133369 (2020)","DOI":"10.1109\/CVPR42600.2020.00342"},{"key":"1625_CR52","doi-asserted-by":"crossref","unstructured":"Wang, X., Chan, K.C., Yu, K., Dong, C., Change\u00a0Loy, C.: EDVR: video restoration with enhanced deformable convolutional networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (2019)","DOI":"10.1109\/CVPRW.2019.00247"},{"key":"1625_CR53","doi-asserted-by":"crossref","unstructured":"Chan, K.C., Wang, X., Yu, K., Dong, C., Loy, C.C.: Understanding deformable alignment in video super-resolution. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 973\u2013981 (2021)","DOI":"10.1609\/aaai.v35i2.16181"},{"key":"1625_CR54","doi-asserted-by":"crossref","unstructured":"Chan, K.C., Zhou, S., Xu, X., Loy, C.C.: BasicVSR++: improving video super-resolution with enhanced propagation and alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5972\u20135981 (2022)","DOI":"10.1109\/CVPR52688.2022.00588"},{"key":"1625_CR55","doi-asserted-by":"publisher","first-page":"10831","DOI":"10.1109\/TCSVT.2024.3411061","volume":"34","author":"X Zhou","year":"2024","unstructured":"Zhou, X., Fu, C., Huang, H., He, R.: Dynamic graph memory bank for video inpainting. IEEE Trans. Circuits Syst. Video Technol. 34, 10831\u201310844 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1625_CR56","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1625_CR57","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"issue":"3","key":"1625_CR58","doi-asserted-by":"publisher","first-page":"1034","DOI":"10.1109\/TCSVT.2021.3072412","volume":"32","author":"K Zhang","year":"2022","unstructured":"Zhang, K., Li, Y., Wang, J., Cambria, E., Li, X.: Real-time video emotion recognition based on reinforcement learning and domain knowledge. IEEE Trans. Circuits Syst. Video Technol. 32(3), 1034\u20131047 (2022). https:\/\/doi.org\/10.1109\/TCSVT.2021.3072412","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1625_CR59","doi-asserted-by":"publisher","first-page":"4104","DOI":"10.1109\/TIP.2022.3180585","volume":"31","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Yuan, J., Tu, Z.: Motion-driven visual tempo learning for video-based action recognition. IEEE Trans. Image Process. 31, 4104\u20134116 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"1625_CR60","doi-asserted-by":"crossref","unstructured":"Lee, S., Oh, S.W., Won, D., Kim, S.J.: Copy-and-paste networks for deep video inpainting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4413\u20134421 (2019)","DOI":"10.1109\/ICCV.2019.00451"},{"key":"1625_CR61","doi-asserted-by":"crossref","unstructured":"Oh, S.W., Lee, S., Lee, J.-Y., Kim, S.J.: Onion-peel networks for deep video completion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4403\u20134412 (2019)","DOI":"10.1109\/ICCV.2019.00451"},{"key":"1625_CR62","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1625_CR63","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., Hu, H.: Video Swin transformer, pp. 2106\u201313230. arXiv e-prints arXiv:2106.13230 [cs.CV] (2021)"},{"key":"1625_CR64","first-page":"12493","volume":"34","author":"M Patrick","year":"2021","unstructured":"Patrick, M., Campbell, D., Asano, Y., Misra, I., Metze, F., Feichtenhofer, C., Vedaldi, A., Henriques, J.F.: Keeping your eye on the ball: trajectory attention in video transformers. Adv. Neural Inf. Process. Syst. 34, 12493\u201312506 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1625_CR65","doi-asserted-by":"crossref","unstructured":"Liu, R., Deng, H., Huang, Y., Shi, X., Lu, L., Sun, W., Wang, X., Dai, J., Li, H.: FuseFormer: fusing fine-grained information in transformers for video inpainting. In: International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01378"},{"key":"1625_CR66","doi-asserted-by":"crossref","unstructured":"Cai, J., Li, C., Tao, X., Yuan, C., Tai, Y.-W.: DEVIT: deformed vision transformers in video inpainting. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 779\u2013789 (2022)","DOI":"10.1145\/3503161.3548395"},{"key":"1625_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, K., Fu, J., Liu, D.: Flow-guided transformer for video inpainting. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XVIII, pp. 74\u201390. Springer (2022)","DOI":"10.1007\/978-3-031-19797-0_5"},{"key":"1625_CR68","unstructured":"Ji, Z., Su, Y., Zhang, Y., Hou, J., Pang, Y., Han, J.: RAFormer: redundancy-aware transformer for video wire inpainting. arXiv preprint arXiv:2404.15802 (2024)"},{"key":"1625_CR69","doi-asserted-by":"crossref","unstructured":"Zhou, S., Li, C., Chan, K.C., Loy, C.C.: ProPainter: improving propagation and transformer for video inpainting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10477\u201310486 (2023)","DOI":"10.1109\/ICCV51070.2023.00961"},{"key":"1625_CR70","doi-asserted-by":"publisher","first-page":"5860","DOI":"10.1109\/TMM.2023.3340089","volume":"26","author":"S Li","year":"2023","unstructured":"Li, S., Zhu, S., Ge, Y., Zeng, B., Imran, M.A., Abbasi, Q.H., Cooper, J.: Depth-guided deep video inpainting. IEEE Trans. Multimedia 26, 5860\u20135871 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"1625_CR71","unstructured":"Chu, X., Tian, Z., Wang, Y., Zhang, B., Ren, H., Wei, X., Xia, H., Shen, C.: Twins: revisiting the design of spatial attention in vision transformers. In: Advances in Neural Information Processing Systems, vol. 34 (2021)"},{"key":"1625_CR72","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"12","key":"1625_CR73","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1109\/MSPEC.1967.5217220","volume":"4","author":"EO Brigham","year":"1967","unstructured":"Brigham, E.O., Morrow, R.: The fast Fourier transform. IEEE Spectr. 4(12), 63\u201370 (1967)","journal-title":"IEEE Spectr."},{"key":"1625_CR74","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Black, M.J.: Optical flow estimation using a spatial pyramid network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4161\u20134170 (2017)","DOI":"10.1109\/CVPR.2017.291"},{"key":"1625_CR75","unstructured":"Soltani, R., Jiang, H.: Higher order recurrent neural networks. arXiv preprint arXiv:1605.00064 (2016)"},{"key":"1625_CR76","doi-asserted-by":"crossref","unstructured":"Sun, D., Yang, X., Liu, M.-Y., Kautz, J.: PWC-Net: CNNs for optical flow using pyramid, warping, and cost volume. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8934\u20138943 (2018)","DOI":"10.1109\/CVPR.2018.00931"},{"key":"1625_CR77","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106323","volume":"123","author":"G Li","year":"2023","unstructured":"Li, G., Zhang, K., Su, Y., Wang, J.: Feature pre-inpainting enhanced transformer for video inpainting. Eng. Appl. Artif. Intell. 123, 106323 (2023)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1625_CR78","doi-asserted-by":"crossref","unstructured":"Chang, Y.-L., Liu, Z.Y., Lee, K.-Y., Hsu, W.: Free-form video inpainting with 3D gated convolution and temporal PatchGAN. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9066\u20139075 (2019)","DOI":"10.1109\/ICCV.2019.00916"},{"key":"1625_CR79","doi-asserted-by":"crossref","unstructured":"Xu, N., Yang, L., Fan, Y., Yue, D., Liang, Y., Yang, J., Huang, T.: YouTube-VOS: a large-scale video object segmentation benchmark. arXiv preprint arXiv:1809.03327 (2018)","DOI":"10.1007\/978-3-030-01228-1_36"},{"key":"1625_CR80","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Pont-Tuset, J., McWilliams, B., Van\u00a0Gool, L., Gross, M., Sorkine-Hornung, A.: A benchmark dataset and evaluation methodology for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 724\u2013732 (2016)","DOI":"10.1109\/CVPR.2016.85"},{"key":"1625_CR81","unstructured":"Chang, Y.-L., Liu, Z.Y., Lee, K.-Y., Hsu, W.: Learnable gated temporal shift module for deep video inpainting. arXiv preprint arXiv:1907.01131 (2019)"},{"key":"1625_CR82","unstructured":"Wang, T.-C., Liu, M.-Y., Zhu, J.-Y., Liu, G., Tao, A., Kautz, J., Catanzaro, B.: Video-to-video synthesis. arXiv preprint arXiv:1808.06601 (2018)"},{"key":"1625_CR83","doi-asserted-by":"crossref","unstructured":"Lai, W.-S., Huang, J.-B., Wang, O., Shechtman, E., Yumer, E., Yang, M.-H.: Learning blind video temporal consistency. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 170\u2013185 (2018)","DOI":"10.1007\/978-3-030-01267-0_11"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01625-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01625-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01625-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T11:03:26Z","timestamp":1740740606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01625-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,29]]},"references-count":83,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1625"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01625-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,29]]},"assertion":[{"value":"18 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"32"}}