{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,18]],"date-time":"2025-04-18T04:11:36Z","timestamp":1744949496754,"version":"3.40.4"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s11263-024-02264-8","type":"journal-article","created":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T20:56:25Z","timestamp":1732308985000},"page":"2345-2370","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["NAFT and SynthStab: A RAFT-Based Network and a Synthetic Dataset for Digital Video Stabilization"],"prefix":"10.1007","volume":"133","author":[{"given":"Marcos Roberto","family":"e Souza","sequence":"first","affiliation":[]},{"given":"Helena de Almeida","family":"Maia","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0125-630X","authenticated-orcid":false,"given":"Helio","family":"Pedrini","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"2264_CR1","unstructured":"Ali, MK., Yu, S., & Kim, TH. (2021) Deep Motion Blind Video Stabilization. In: 32nd British Machine Vision Conference, British Machine Vision Association"},{"issue":"1","key":"2264_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3363550","volume":"39","author":"J Choi","year":"2020","unstructured":"Choi, J., & Kweon, I. S. (2020). Deep iterative frame interpolation for full-frame video stabilization. ACM Transactions on Graphics, 39(1), 1\u20139.","journal-title":"ACM Transactions on Graphics"},{"key":"2264_CR3","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., Fischer, P., Ilg, E., et\u00a0al. (2015) FlowNet: learning optical flow with convolutional networks. In: IEEE International Conference on Computer Vision, pp 2758\u20132766","DOI":"10.1109\/ICCV.2015.316"},{"key":"2264_CR4","doi-asserted-by":"crossref","unstructured":"Eldesokey, A., & Felsberg, M. (2021) Normalized Convolution Upsampling for Refined Optical Flow Estimation. arXiv preprint arXiv:2102.06979","DOI":"10.5220\/0010343707420752"},{"issue":"9","key":"2264_CR5","doi-asserted-by":"publisher","first-page":"2061","DOI":"10.1109\/TCSVT.2017.2707479","volume":"28","author":"D Ghadiyaram","year":"2017","unstructured":"Ghadiyaram, D., Pan, J., Bovik, A. C., et al. (2017). In-capture mobile video distortions: A study of subjective behavior and objective algorithms. IEEE Transactions on Circuits and Systems for Video Technology, 28(9), 2061\u20132077.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2264_CR6","unstructured":"Gielisse, AS. (2023) Optical flow upsamplers ignore details: neighborhood attention transformers for convex upsampling. PhD thesis, Delft University of Technology"},{"issue":"1","key":"2264_CR7","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1145\/1404880.1404882","volume":"5","author":"ML Gleicher","year":"2008","unstructured":"Gleicher, M. L., & Liu, F. (2008). Re-cinematography: Improving the camerawork of casual video. ACM Transactions on Multimedia Computing, Communications, and Applications, 5(1), 2.","journal-title":"ACM Transactions on Multimedia Computing, Communications, and Applications"},{"key":"2264_CR8","doi-asserted-by":"crossref","unstructured":"Grundmann, M., Kwatra, V., & Essa, I. (2011) Auto-directed video stabilization with robust L1 optimal camera paths. in: conference on computer vision and pattern recognition. IEEE, pp 225\u2013232","DOI":"10.1109\/CVPR.2011.5995525"},{"issue":"2","key":"2264_CR9","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/TPAMI.2007.1166","volume":"30","author":"H Hirschmuller","year":"2007","unstructured":"Hirschmuller, H. (2007). Stereo processing by semiglobal matching and mutual information. IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(2), 328\u2013341.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2264_CR10","unstructured":"Huang, CH., Yin, H., Tai, YW., et\u00a0al. (2019) StableNet: semi-online, multi-scale deep video stabilization. arXiv preprint arXiv:1907.10283 1:1\u20138"},{"key":"2264_CR11","doi-asserted-by":"crossref","unstructured":"Huang, Z., Shi, X., Zhang, C., et\u00a0al. (2022) FlowFormer: a transformer architecture for optical flow. In: European Conference on Computer Vision, Springer, pp 668\u2013685","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"2264_CR12","doi-asserted-by":"crossref","unstructured":"Ilg, E., Mayer, N., Saikia, T., et\u00a0al. (2017) Flownet 2.0: evolution of optical flow estimation with deep networks. In: IEEE Conference on Computer Vision and Pattern Recognition, pp 2462\u20132470","DOI":"10.1109\/CVPR.2017.179"},{"key":"2264_CR13","doi-asserted-by":"crossref","unstructured":"Ito, MS., & Izquierdo, E. (2019) A dataset and evaluation framework for deep learning based video stabilization systems. In: Visual Communications and Image Processing, IEEE, pp 1\u20134","DOI":"10.1109\/VCIP47243.2019.8966057"},{"key":"2264_CR14","unstructured":"James, JG., Jain, D., & Rajwade, A. (2023) GlobalFlowNet: video stabilization using deep distilled global motion estimates. In: IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 5078\u20135087"},{"key":"2264_CR15","doi-asserted-by":"crossref","unstructured":"Jiang, S., Campbell, D., Lu, Y., et\u00a0al. (2021) Learning to estimate hidden motions with global motion aggregation. In: IEEE International Conference on Computer Vision, pp 9772\u20139781","DOI":"10.1109\/ICCV48922.2021.00963"},{"key":"2264_CR16","doi-asserted-by":"crossref","unstructured":"Jung, H., Hui, Z., Luo, L., et\u00a0al. (2023) AnyFlow: arbitrary scale optical flow with implicit neural representation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5455\u20135465","DOI":"10.1109\/CVPR52729.2023.00528"},{"key":"2264_CR17","doi-asserted-by":"crossref","unstructured":"Lee, YC., Tseng, KW., Chen, YT., et\u00a0al. (2021) 3D Video stabilization with depth estimation by CNN-based optimization. In: IEEE Conference on Computer Vision and Pattern Recognition, pp 10621\u201310630","DOI":"10.1109\/CVPR46437.2021.01048"},{"key":"2264_CR18","doi-asserted-by":"crossref","unstructured":"Li, Z., Lu, CZ., Qin, J., et\u00a0al. (2022) Towards an End-to-End Framework for Flow-Guided Video Inpainting. In: IEEE Conference on Computer Vision and Pattern Recognition, pp 17562\u201317571","DOI":"10.1109\/CVPR52688.2022.01704"},{"key":"2264_CR19","doi-asserted-by":"crossref","unstructured":"Lipson, L., Teed, Z., Deng, J. (2021) RAFT-stereo: Multilevel recurrent field transforms for stereo matching. In: International Conference on 3D Vision, IEEE, pp 218\u2013227","DOI":"10.1109\/3DV53792.2021.00032"},{"issue":"4","key":"2264_CR20","first-page":"1","volume":"32","author":"S Liu","year":"2013","unstructured":"Liu, S., Yuan, L., Tan, P., et al. (2013). Bundled camera paths for video stabilization. ACM Transactions on Graphics, 32(4), 1\u201310.","journal-title":"ACM Transactions on Graphics"},{"key":"2264_CR21","doi-asserted-by":"crossref","unstructured":"Liu, S., Yuan, L., Tan, P., et\u00a0al. (2014) Steadyflow: spatially smooth optical flow for video stabilization. In: IEEE conference on computer vision and pattern recognition, pp 4209\u20134216","DOI":"10.1109\/CVPR.2014.536"},{"key":"2264_CR22","doi-asserted-by":"crossref","unstructured":"Liu, YL., Lai, WS., Yang, MH., et\u00a0al. (2021) Hybrid neural fusion for full-frame video stabilization. In: IEEE international conference on computer vision, pp 2299\u20132308","DOI":"10.1109\/ICCV48922.2021.00230"},{"issue":"7","key":"2264_CR23","doi-asserted-by":"publisher","first-page":"1150","DOI":"10.1109\/TPAMI.2006.141","volume":"28","author":"Y Matsushita","year":"2006","unstructured":"Matsushita, Y., Ofek, E., Ge, W., et al. (2006). Full-frame video stabilization with motion inpainting. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(7), 1150\u20131163.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2264_CR24","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Pont-Tuset, J., McWilliams, B., et\u00a0al. (2016) a benchmark dataset and evaluation methodology for video object segmentation. In: IEEE conference on computer vision and pattern recognition, pp 724\u2013732","DOI":"10.1109\/CVPR.2016.85"},{"key":"2264_CR25","doi-asserted-by":"crossref","unstructured":"Qu, H., Song, L., & Xue, G. (2013) Shaking video synthesis for video stabilization performance assessment. in: visual communications and image processing, IEEE, pp 1\u20136","DOI":"10.1109\/VCIP.2013.6706422"},{"key":"2264_CR26","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Black, MJ. (2017) Optical flow estimation using a spatial pyramid network. In: IEEE conference on computer vision and pattern recognition, pp 4161\u20134170","DOI":"10.1109\/CVPR.2017.291"},{"key":"2264_CR27","doi-asserted-by":"crossref","unstructured":"Rao, Q., Yu, X., Navasardyan, S., et\u00a0al. (2023) Sim2RealVS: a new benchmark for video stabilization with a strong baseline. In: IEEE\/CVF winter conference on applications of computer vision, pp 5406\u20135415","DOI":"10.1109\/WACV56688.2023.00537"},{"key":"2264_CR28","doi-asserted-by":"crossref","unstructured":"Shah, S., Dey, D., Lovett, C., et\u00a0al. (2018) AirSim: High-Fidelity Visual and Physical Simulation for Autonomous Vehicles. In: Field and Service Robotics. Springer International Publishing","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"2264_CR29","doi-asserted-by":"crossref","unstructured":"Smith, LN., & Topin, N. (2019) Super-convergence: very fast training of neural networks using large learning rates. in: artificial intelligence and machine learning for multi-domain operations applications, SPIE, pp 369\u2013386","DOI":"10.1117\/12.2520589"},{"key":"2264_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13640-020-00508-4","volume":"2020","author":"MR Souza","year":"2020","unstructured":"Souza, M. R., & Pedrini, H. (2020). Visual Rhythms for Qualitative Evaluation of Video Stabilization. EURASIP Journal on Image and Video Processing, 2020, 1\u201319.","journal-title":"EURASIP Journal on Image and Video Processing"},{"issue":"12","key":"2264_CR31","doi-asserted-by":"publisher","first-page":"2204","DOI":"10.1049\/iet-ipr.2018.5445","volume":"12","author":"MR Souza","year":"2018","unstructured":"Souza, M. R., da Fonseca, L. F. R., & Pedrini, H. (2018). Improvement of global motion estimation in two-dimensional digital video stabilisation methods. Image Processing, 12(12), 2204\u20132211.","journal-title":"Image Processing"},{"key":"2264_CR32","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1016\/j.neucom.2020.04.035","volume":"402","author":"MR Souza","year":"2020","unstructured":"Souza, M. R., de Almeida, M. H., Vieira, M. B., et al. (2020). Survey on visual rhythms: A spatio-temporal representation for video sequences. Neurocomputing, 402, 409\u2013422.","journal-title":"Neurocomputing"},{"issue":"3","key":"2264_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3494525","volume":"55","author":"MR Souza","year":"2022","unstructured":"Souza, M. R., Maia, H. A., & Pedrini, H. (2022). Survey on digital video stabilization: Concepts, methods, and challenges. ACM Computing Surveys, 55(3), 1\u201337.","journal-title":"ACM Computing Surveys"},{"key":"2264_CR34","doi-asserted-by":"crossref","unstructured":"Sui, X., Li, S., Geng, X., et\u00a0al. (2022) CRAFT: Cross-attentional flow transformer for robust optical flow. In: IEEE conference on computer vision and pattern recognition, pp 17602\u201317611","DOI":"10.1109\/CVPR52688.2022.01708"},{"key":"2264_CR35","doi-asserted-by":"crossref","unstructured":"Sun, D., Yang, X., Liu, MY., et\u00a0al. (2018) PWC-Net: CNNs for optical flow using pyramid, warping, and cost volume. In: IEEE conference on computer vision and pattern recognition, pp 8934\u20138943","DOI":"10.1109\/CVPR.2018.00931"},{"key":"2264_CR36","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2020) RAFT: recurrent all-pairs field transforms for optical flow. In: European conference on computer vision, Springer, pp 402\u2013419","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"2264_CR37","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2021) RAFT-3D: Scene flow using rigid-motion embeddings. In: IEEE\/CVF conference on computer vision and pattern recognition, pp 8375\u20138384","DOI":"10.1109\/CVPR46437.2021.00827"},{"issue":"5","key":"2264_CR38","doi-asserted-by":"publisher","first-page":"2283","DOI":"10.1109\/TIP.2018.2884280","volume":"28","author":"M Wang","year":"2019","unstructured":"Wang, M., Yang, G. Y., Lin, J. K., et al. (2019). Deep online video stabilization with multi-grid warping transformation learning. IEEE Transactions on Image Processing, 28(5), 2283\u20132292.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2264_CR39","first-page":"1","volume":"31","author":"TC Wang","year":"2018","unstructured":"Wang, T. C., Liu, M. Y., Zhu, J. Y., et al. (2018). Video-to-video synthesis. Advances in Neural Information Processing Systems, 31, 1\u20138.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"4","key":"2264_CR40","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A. C., Sheikh, H. R., et al. (2004). Image quality assessment: from error visibility to structural similarity. IEEE Transactions on Image Processing, 13(4), 600\u2013612.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2264_CR41","unstructured":"Wu, H., Liao, L., Chen, C., et\u00a0al. (2022) Disentangling aesthetic and technical effects for video quality assessment of user generated content. arXiv preprint arXiv:2211.04894 1:1\u20138"},{"key":"2264_CR42","doi-asserted-by":"crossref","unstructured":"Xu, H., Zhang, J., Cai, J., et\u00a0al. (2022a) GMFlow: learning optical flow via global matching. In: IEEE conference on computer vision and pattern recognition, pp 8121\u20138130","DOI":"10.1109\/CVPR52688.2022.00795"},{"key":"2264_CR43","volume-title":"Unifying Flow","author":"H Xu","year":"2023","unstructured":"Xu, H., Zhang, J., Cai, J., et al. (2023). Unifying Flow. Transactions on Pattern Analysis and Machine Intelligence: Stereo and Depth Estimation."},{"key":"2264_CR44","doi-asserted-by":"crossref","unstructured":"Xu, N., Yang, L., Fan, Y., et\u00a0al. (2018a) YouTube-VOS: sequence-to-sequence video object segmentation. In: European Conference on Computer Vision, pp 585\u2013601","DOI":"10.1007\/978-3-030-01228-1_36"},{"key":"2264_CR45","doi-asserted-by":"crossref","unstructured":"Xu, SZ., Hu, J., Wang, M., et\u00a0al. (2018b) Deep video stabilization using adversarial networks. In: Computer Graphics Forum, Wiley Online Library, pp 267\u2013276","DOI":"10.1111\/cgf.13566"},{"key":"2264_CR46","doi-asserted-by":"crossref","unstructured":"Xu, Y., Zhang, J., & Tao, D. (2021) Out-of-boundary view synthesis towards full-frame video stabilization. In: IEEE\/CVF International Conference on Computer Vision, pp 4842\u20134851","DOI":"10.1109\/ICCV48922.2021.00480"},{"key":"2264_CR47","doi-asserted-by":"publisher","first-page":"4306","DOI":"10.1109\/TIP.2022.3182887","volume":"31","author":"Y Xu","year":"2022","unstructured":"Xu, Y., Zhang, J., Maybank, S. J., et al. (2022). DUT: Learning video stabilization by simply watching unstable videos. IEEE Transactions on Image Processing, 31, 4306\u20134320.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2264_CR48","doi-asserted-by":"crossref","unstructured":"Yu, J., & Ramamoorthi, R. (2020) Learning video stabilization using optical flow. In: IEEE conference on computer vision and pattern recognition, pp 8159\u20138167","DOI":"10.1109\/CVPR42600.2020.00818"},{"issue":"4","key":"2264_CR49","doi-asserted-by":"publisher","first-page":"1681","DOI":"10.1109\/TVCG.2018.2817209","volume":"25","author":"L Zhang","year":"2018","unstructured":"Zhang, L., Zheng, Q. Z., & Huang, H. (2018). Intrinsic motion stability assessment for video stabilization. IEEE Transactions on Visualization and Computer Graphics, 25(4), 1681\u20131692.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"issue":"12","key":"2264_CR50","doi-asserted-by":"publisher","first-page":"6051","DOI":"10.1109\/TIP.2018.2864873","volume":"27","author":"L Zhang","year":"2018","unstructured":"Zhang, L., Zheng, Q. Z., Liu, H. K., et al. (2018). Full-reference stability assessment of digital video stabilization based on riemannian metric. IEEE Transactions on Image Processing, 27(12), 6051\u20136063.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2264_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Liu, Z., Tan, P., et\u00a0al. (2023) Minimum latency deep online video stabilization. In: IEEE international conference on computer vision, pp 23030\u201323039","DOI":"10.1109\/ICCV51070.2023.02105"},{"key":"2264_CR52","doi-asserted-by":"publisher","first-page":"3582","DOI":"10.1109\/TIP.2019.2963380","volume":"29","author":"M Zhao","year":"2020","unstructured":"Zhao, M., & Ling, Q. (2020). PWStableNet: Learning pixel-wise warping maps for video stabilization. IEEE Transactions on Image Processing, 29, 3582\u20133595.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2264_CR53","doi-asserted-by":"crossref","unstructured":"Zhao, W., Li, X., Peng, Z., et\u00a0al. (2023) Fast full-frame video stabilization with iterative optimization. In: IEEE international conference on computer vision, pp 23534\u201323544","DOI":"10.1109\/ICCV51070.2023.02151"},{"key":"2264_CR54","doi-asserted-by":"crossref","unstructured":"Zhong, Y., Ji, P., Wang, J., et\u00a0al. (2019) Unsupervised deep epipolar flow for stationary or dynamic scenes. In: IEEE conference on computer vision and pattern recognition, pp 12095\u201312104","DOI":"10.1109\/CVPR.2019.01237"},{"key":"2264_CR55","doi-asserted-by":"crossref","unstructured":"Wu, C. Y., Li, Y., Mangalam, K., Fan, H., Xiong, B., Malik, J., Feichtenhofer, C. (2022) MemViT: memory-augmented multiscale vision transformer for efficient long-term video recognition. In: IEEE conference on computer vision and pattern recognition, pp 13587\u201313597","DOI":"10.1109\/CVPR52688.2022.01322"},{"key":"2264_CR56","unstructured":"Wu, C-Y, Li, Y, Mangalam, & Karttikeya, et\u00a0al. (2022) MeMOT: Multi-object tracking with memory. In: IEEE conference on computer vision and pattern recognition, pp 8090\u20138100"},{"key":"2264_CR57","unstructured":"Wu, C-Y, Li, Y, Mangalam, & Karttikeya, et\u00a0al. (2021) High-Resolution Optical Flow from 1D Attention and Correlation. In: IEEE\/CVF International Conference on Computer Vision, pp 10498\u201310507"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02264-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02264-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02264-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T05:59:41Z","timestamp":1744869581000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02264-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"references-count":57,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2264"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02264-8","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"8 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}