{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:15:35Z","timestamp":1774721735558,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"26","license":[{"start":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T00:00:00Z","timestamp":1754092800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T00:00:00Z","timestamp":1754092800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172267"],"award-info":[{"award-number":["62172267"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Shanghai, China","award":["20ZR1420400"],"award-info":[{"award-number":["20ZR1420400"]}]},{"name":"State Key Program of National Natural Science Foundation of China","award":["61936001"],"award-info":[{"award-number":["61936001"]}]},{"DOI":"10.13039\/501100013105","name":"Shanghai Rising-Star Program","doi-asserted-by":"publisher","award":["21QB1401900"],"award-info":[{"award-number":["21QB1401900"]}],"id":[{"id":"10.13039\/501100013105","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research Project of Zhejiang Laboratory","award":["2021PE0AC02"],"award-info":[{"award-number":["2021PE0AC02"]}]},{"DOI":"10.13039\/501100003399","name":"Shanghai Municipal Science and Technology Commission","doi-asserted-by":"crossref","award":["19080503200"],"award-info":[{"award-number":["19080503200"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s00521-025-11494-x","type":"journal-article","created":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T18:07:57Z","timestamp":1754158077000},"page":"21909-21929","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["OVST: online video stabilization with two-stage training transformer"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5331-022X","authenticated-orcid":false,"given":"Xing","family":"Wu","sequence":"first","affiliation":[]},{"given":"Yimin","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Han","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Song","sequence":"additional","affiliation":[]},{"given":"Junfeng","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Quan","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Yike","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,2]]},"reference":[{"key":"11494_CR1","doi-asserted-by":"publisher","first-page":"3582","DOI":"10.1109\/TIP.2019.2963380","volume":"29","author":"MD Zhao","year":"2020","unstructured":"Zhao MD, Ling Q (2020) Pwstablenet: learning pixel-wise warping maps for video stabilization. IEEE Trans Image Process 29:3582\u20133595","journal-title":"IEEE Trans Image Process"},{"key":"11494_CR2","doi-asserted-by":"crossref","unstructured":"Liu YL, Lai WS, Yang MH et al (2021) Hybrid neural fusion for full-frame video stabilization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2299\u20132308","DOI":"10.1109\/ICCV48922.2021.00230"},{"key":"11494_CR3","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1111\/cgf.13566","volume":"37","author":"SZ Xu","year":"2018","unstructured":"Xu SZ, Hu J, Wang M et al (2018) Deep video stabilization using adversarial networks. Proc Comput Graph Forum 37:267\u2013276","journal-title":"Proc Comput Graph Forum"},{"issue":"10","key":"11494_CR4","doi-asserted-by":"publisher","first-page":"4941","DOI":"10.1109\/TIP.2019.2917283","volume":"28","author":"BH Xu","year":"2019","unstructured":"Xu BH, Ye H, Zheng YB et al (2019) Dense dilated network for video action recognition. IEEE Trans Image Process 28(10):4941\u20134953","journal-title":"IEEE Trans Image Process"},{"key":"11494_CR5","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1016\/j.compeleceng.2019.05.009","volume":"77","author":"SY Lu","year":"2019","unstructured":"Lu SY, Wang BZ, Wang HJ et al (2019) A real-time object detection algorithm for video. Comput Electr Eng 77:398\u2013408","journal-title":"Comput Electr Eng"},{"key":"11494_CR6","doi-asserted-by":"publisher","first-page":"2817","DOI":"10.1109\/JSTARS.2021.3059054","volume":"14","author":"MM Valero","year":"2021","unstructured":"Valero MM, Verstockt S, Butler B et al (2021) Thermal infrared video stabilization for aerial monitoring of active wildfires. IEEE J Sel Top Appl Earth Observ Remote Sens 14:2817\u20132832","journal-title":"IEEE J Sel Top Appl Earth Observ Remote Sens"},{"key":"11494_CR7","first-page":"631","volume":"2","author":"F Liu","year":"2023","unstructured":"Liu F, Gleicher M, Jin HL et al (2023) Content-preserving warps for 3D video stabilization. Semin Graph Pap Push Bound 2:631\u2013639","journal-title":"Semin Graph Pap Push Bound"},{"issue":"10","key":"11494_CR8","doi-asserted-by":"publisher","first-page":"2582","DOI":"10.1007\/s11263-023-01827-5","volume":"131","author":"WY Zhao","year":"2023","unstructured":"Zhao WY, Lu H, Cao ZG et al (2023) A2B: anchor to barycentric coordinate for robust correspondence. Int J Comput Vis 131(10):2582\u20132606","journal-title":"Int J Comput Vis"},{"issue":"10","key":"11494_CR9","doi-asserted-by":"publisher","first-page":"1390","DOI":"10.1109\/TCSVT.2011.2162689","volume":"21","author":"G Puglisi","year":"2011","unstructured":"Puglisi G, Battiato S (2011) A robust image alignment algorithm for video stabilization purposes. IEEE Trans Circuits Syst Video Technol 21(10):1390\u20131400","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"8","key":"11494_CR10","doi-asserted-by":"publisher","first-page":"1354","DOI":"10.1109\/TVCG.2013.11","volume":"19","author":"YS Wang","year":"2013","unstructured":"Wang YS, Liu F, Hsu PS et al (2013) Spatially and temporally optimized video stabilization. IEEE Trans Vis Comput Graph 19(8):1354\u20131361","journal-title":"IEEE Trans Vis Comput Graph"},{"issue":"1","key":"11494_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1899404.1899408","volume":"30","author":"F Liu","year":"2011","unstructured":"Liu F, Gleicher M, Wang J et al (2011) Subspace video stabilization. ACM Trans Graph 30(1):1\u201310","journal-title":"ACM Trans Graph"},{"key":"11494_CR12","doi-asserted-by":"crossref","unstructured":"Liu SC, Wang YT, Yuan L et al (2012) Video stabilization with a depth camera. In: 2012 IEEE conference on computer vision and pattern recognition, pp 89\u201395","DOI":"10.1109\/CVPR.2012.6247662"},{"key":"11494_CR13","doi-asserted-by":"publisher","first-page":"116015","DOI":"10.1016\/j.image.2020.116015","volume":"90","author":"W Guilluy","year":"2021","unstructured":"Guilluy W, Oudre L, Beghdadi A (2021) Video stabilization: overview, challenges and perspectives. Signal Process Image Commun 90:116015","journal-title":"Signal Process Image Commun"},{"issue":"6","key":"11494_CR14","doi-asserted-by":"publisher","first-page":"3923","DOI":"10.1109\/TCSVT.2021.3107135","volume":"32","author":"DC Jin","year":"2021","unstructured":"Jin DC, Lei JJ, Peng B et al (2021) Deep affine motion compensation network for inter prediction in VVC. IEEE Trans Circuits Syst Video Technol 32(6):3923\u20133933","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"11494_CR15","doi-asserted-by":"crossref","unstructured":"Ye NJ, Wang C, Fan HQ et al (2021) Motion basis learning for unsupervised deep homography estimation with subspace projection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 13117\u201313125","DOI":"10.1109\/ICCV48922.2021.01287"},{"issue":"6","key":"11494_CR16","doi-asserted-by":"publisher","first-page":"7885","DOI":"10.1109\/TPAMI.2022.3223789","volume":"45","author":"SC Liu","year":"2022","unstructured":"Liu SC, Lu YH, Jiang H et al (2022) Unsupervised global and local homography estimation with motion basis learning. IEEE Trans Pattern Anal Mach Intell 45(6):7885\u20137899","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"11494_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3494525","volume":"55","author":"M Roberto e Souza","year":"2022","unstructured":"Roberto e Souza M, Maia HA, Pedrini H (2022) Survey on digital video stabilization: concepts, methods, and challenges. ACM Comput Surv 55(3):1\u201337","journal-title":"ACM Comput Surv"},{"key":"11494_CR18","doi-asserted-by":"crossref","unstructured":"Zhao WY, Li X, Peng Z et al (2023) Fast full-frame video stabilization with iterative optimization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 23534\u201323544","DOI":"10.1109\/ICCV51070.2023.02151"},{"key":"11494_CR19","doi-asserted-by":"crossref","unstructured":"Yu JY, Ramamoorthi R (2019) Robust video stabilization by optimization in CNN weight space. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3800\u20133808","DOI":"10.1109\/CVPR.2019.00392"},{"key":"11494_CR20","doi-asserted-by":"crossref","unstructured":"Xu YF, Zhang J, Tao DC (2021) Out-of-boundary view synthesis towards full-frame video stabilization. In: Proceedings of 2021 IEEE\/CVF international conference on computer vision, pp. 4842\u20134851","DOI":"10.1109\/ICCV48922.2021.00480"},{"key":"11494_CR21","doi-asserted-by":"crossref","unstructured":"Yu JY, Ramamoorthi R (2020) Learning video stabilization using optical flow. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8159\u20138167","DOI":"10.1109\/CVPR42600.2020.00818"},{"key":"11494_CR22","doi-asserted-by":"crossref","unstructured":"Liu SC, Tan P, Yuan L et al (2016) Meshflow: minimum latency online video stabilization. In: Proceedings of computer vision\u2014ECCV 2016, pp 800\u2013815","DOI":"10.1007\/978-3-319-46466-4_48"},{"key":"11494_CR23","doi-asserted-by":"crossref","unstructured":"Zhang ZF, Liu Z, Tan P et al (2023) Minimum latency deep online video stabilization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 23030\u201323039","DOI":"10.1109\/ICCV51070.2023.02105"},{"issue":"5","key":"11494_CR24","doi-asserted-by":"publisher","first-page":"2283","DOI":"10.1109\/TIP.2018.2884280","volume":"28","author":"M Wang","year":"2018","unstructured":"Wang M, Yang GY, Lin JK et al (2018) Deep online video stabilization with multi-grid warping transformation learning. IEEE Trans Image Process 28(5):2283\u20132292","journal-title":"IEEE Trans Image Process"},{"issue":"1","key":"11494_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3363550","volume":"39","author":"J Choi","year":"2020","unstructured":"Choi J, Kweon IS (2020) Deep iterative frame interpolation for full-frame video stabilization. ACM Trans Graph 39(1):1\u20139","journal-title":"ACM Trans Graph"},{"key":"11494_CR26","doi-asserted-by":"crossref","unstructured":"Shi ZM, Shi FH, Lai WS et al (2022) Deep online fused video stabilization. In: Proceedings of the IEEE\/CVF Winter conference on applications of computer vision, pp 1250\u20131258","DOI":"10.1109\/WACV51458.2022.00094"},{"key":"11494_CR27","doi-asserted-by":"crossref","unstructured":"Chen YT, Tseng KW, Lee YC et al (2021) Pixstabnet: fast multi-scale deep online video stabilization with pixel-based warping. In: 2021 IEEE international conference on image processing, pp 1929\u20131933","DOI":"10.1109\/ICIP42928.2021.9506801"},{"key":"11494_CR28","doi-asserted-by":"publisher","first-page":"4306","DOI":"10.1109\/TIP.2022.3182887","volume":"31","author":"YF Xu","year":"2022","unstructured":"Xu YF, Zhang J, Maybank SJ et al (2022) Dut: learning video stabilization by simply watching unstable videos. IEEE Trans Image Process 31:4306\u20134320","journal-title":"IEEE Trans Image Process"},{"issue":"16","key":"11494_CR29","doi-asserted-by":"publisher","first-page":"13371","DOI":"10.1007\/s00521-022-07366-3","volume":"34","author":"D Soydaner","year":"2022","unstructured":"Soydaner D (2022) Attention mechanism in neural networks: where it comes and where it goes. Neural Comput Appl 34(16):13371\u201313385","journal-title":"Neural Comput Appl"},{"key":"11494_CR30","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/s10489-018-1206-2","volume":"49","author":"X Wu","year":"2019","unstructured":"Wu X, Du ZK, Guo YK et al (2019) Hierarchical attention based long short-term memory for Chinese lyric generation. Appl Intell 49:44\u201352","journal-title":"Appl Intell"},{"key":"11494_CR31","doi-asserted-by":"publisher","first-page":"25355","DOI":"10.1007\/s11042-018-5788-9","volume":"77","author":"X Wu","year":"2018","unstructured":"Wu X, Du ZK, Guo YK (2018) A visual attention-based keyword extraction for document classification. Multimed Tools Appl 77:25355\u201325367","journal-title":"Multimed Tools Appl"},{"issue":"16","key":"11494_CR32","doi-asserted-by":"publisher","first-page":"14029","DOI":"10.1007\/s00521-022-07259-5","volume":"34","author":"AV Konstantinov","year":"2022","unstructured":"Konstantinov AV, Utkin LV (2022) Multi-attention multiple instance learning. Neural Comput Appl 34(16):14029\u201314051","journal-title":"Neural Comput Appl"},{"key":"11494_CR33","doi-asserted-by":"publisher","first-page":"14549","DOI":"10.1007\/s00521-020-04893-9","volume":"32","author":"FH Zou","year":"2020","unstructured":"Zou FH, Xiao W, Ji WT et al (2020) Arbitrary-oriented object detection via dense feature fusion and attention model for remote sensing super-resolution image. Neural Comput Appl 32:14549\u201314562","journal-title":"Neural Comput Appl"},{"key":"11494_CR34","doi-asserted-by":"crossref","unstructured":"Wu X, Ge YX, Zhang QF, Zhang DL (2021) PCB defect detection using deep learning methods. In: Proceedings of 2021 IEEE 24th international conference on computer supported cooperative work in design, pp 873\u2013876","DOI":"10.1109\/CSCWD49262.2021.9437846"},{"key":"11494_CR35","doi-asserted-by":"publisher","first-page":"11107","DOI":"10.1007\/s00521-020-05650-8","volume":"33","author":"KZ Xie","year":"2021","unstructured":"Xie KZ, Wei ZQ, Huang L et al (2021) Graph convolutional networks with attention for multi-label weather recognition. Neural Comput Appl 33:11107\u201311123","journal-title":"Neural Comput Appl"},{"issue":"3","key":"11494_CR36","doi-asserted-by":"publisher","first-page":"3444","DOI":"10.1007\/s10489-022-03728-5","volume":"53","author":"X Wu","year":"2023","unstructured":"Wu X, Tang B, Zhao M et al (2023) STR transformer: a cross-domain transformer for scene text recognition. Appl Intell 53(3):3444\u20133458","journal-title":"Appl Intell"},{"key":"11494_CR37","doi-asserted-by":"publisher","first-page":"107725","DOI":"10.1016\/j.engappai.2023.107725","volume":"130","author":"NY Wang","year":"2024","unstructured":"Wang NY, Zhou CD, Zhu RF et al (2024) Soft: self-supervised sparse optical flow transformer for video stabilization via quaternion. Eng Appl Artif Intell 130:107725","journal-title":"Eng Appl Artif Intell"},{"key":"11494_CR38","doi-asserted-by":"crossref","unstructured":"Qin ZG, Zhang PY, Wu F et al (2021) Fcanet: frequency channel attention networks. In: Proceedings of 2021 IEEE\/CVF international conference on computer vision, pp 783\u2013792","DOI":"10.1109\/ICCV48922.2021.00082"},{"key":"11494_CR39","doi-asserted-by":"crossref","unstructured":"Yang ZX, Zhu LC, Wu Y et al (2020) Gated channel transformation for visual recognition. In: Proceedings of 2020 IEEE\/CVF conference on computer vision and pattern recognition, pp 11794\u201311803","DOI":"10.1109\/CVPR42600.2020.01181"},{"key":"11494_CR40","unstructured":"Yang LX, Zhang RY, Li LD et al (2021) Simam: a simple, parameter-free attention module for convolutional neural networks. In: Proceedings of international conference on machine learning, pp 11863\u201311874"},{"key":"11494_CR41","doi-asserted-by":"crossref","unstructured":"Liu ZY, Wang LM, Wu W et al (2021) Tam: temporal adaptive module for video recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 13708\u201313718","DOI":"10.1109\/ICCV48922.2021.01345"},{"key":"11494_CR42","doi-asserted-by":"crossref","unstructured":"Wang XL, Girshick R, Gupta A et al (2018) Non-local neural networks. In: Proceedings of 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"11494_CR43","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. Adv Neural Inf Process Syst 30"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11494-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11494-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11494-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T12:38:42Z","timestamp":1757335122000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11494-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,2]]},"references-count":43,"journal-issue":{"issue":"26","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["11494"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11494-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,2]]},"assertion":[{"value":"8 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}