{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T08:29:33Z","timestamp":1765268973001,"version":"3.44.0"},"reference-count":107,"publisher":"Springer Science and Business Media LLC","issue":"24","license":[{"start":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T00:00:00Z","timestamp":1727222400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T00:00:00Z","timestamp":1727222400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-20246-3","type":"journal-article","created":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T16:50:33Z","timestamp":1727283033000},"page":"28115-28156","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Evolution of transformer-based optical flow estimation techniques: a survey"],"prefix":"10.1007","volume":"84","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9878-1680","authenticated-orcid":false,"given":"Nihal","family":"Kumar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0860-9166","authenticated-orcid":false,"given":"Om Prakash","family":"Verma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5339-8671","authenticated-orcid":false,"given":"Anil Singh","family":"Parihar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,25]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Wang M, Xu J, Ke F, Liao L (2023) A encoder-decoder deblurring network combined with high-frequency a priori. Multimed Tools Appl. https:\/\/api.semanticscholar.org\/CorpusID:266104025","key":"20246_CR1","DOI":"10.1007\/s11042-023-17771-y"},{"doi-asserted-by":"crossref","unstructured":"Ding X, Huang Y, Li Y, He J (2020) Forgery detection of motion compensation interpolated frames based on discontinuity of optical flow. Multimed Tools Appl 79:28729\u201328754. https:\/\/api.semanticscholar.org\/CorpusID:221018490","key":"20246_CR2","DOI":"10.1007\/s11042-020-09340-4"},{"doi-asserted-by":"crossref","unstructured":"Liang Z, Li Y, Yu R, Zhang K (2023) Aircraft type recognition in 3d-view optical image with contour segmentation. Multimed Tools Appl. https:\/\/api.semanticscholar.org\/CorpusID:266068440","key":"20246_CR3","DOI":"10.1007\/s11042-023-17542-9"},{"doi-asserted-by":"crossref","unstructured":"Obeso AM, Benois-Pineau J, Garc\u00eda-V\u00e1zquez MS, Ram\u00edrez-Acosta AA (2021) Visual vs internal attention mechanisms in deep neural networks for image classification and object detection. Pattern Recognit 123:108411. https:\/\/api.semanticscholar.org\/CorpusID:243472146","key":"20246_CR4","DOI":"10.1016\/j.patcog.2021.108411"},{"doi-asserted-by":"crossref","unstructured":"Singh LK, Pooja HG, Khanna M (2022) Performance evaluation of various deep learning based models for effective glaucoma evaluation using optical coherence tomography images. Multimed Tools Appl 81: 27737\u201327781. https:\/\/api.semanticscholar.org\/CorpusID:247817622","key":"20246_CR5","DOI":"10.1007\/s11042-022-12826-y"},{"doi-asserted-by":"crossref","unstructured":"Chen L, Yang X, Jeon G, Anisetti M, Liu K (2020) A trusted medical image super-resolution method based on feedback adaptive weighted dense network. Artif Intell Med 106:101857. https:\/\/api.semanticscholar.org\/CorpusID:219438199","key":"20246_CR6","DOI":"10.1016\/j.artmed.2020.101857"},{"doi-asserted-by":"crossref","unstructured":"Furht B, Furht B, Greenblatt J (1996) Motion estimation algorithms for video compression. In: The Springer international series in engineering and computer science. https:\/\/api.semanticscholar.org\/CorpusID:60683479","key":"20246_CR7","DOI":"10.1007\/978-1-4615-6241-2"},{"doi-asserted-by":"crossref","unstructured":"Azimjonov J, \u00d6zmen A, Varan M (2023) A vision-based real-time traffic flow monitoring system for road intersections. Multimedia Tools Appl 1 \u2013 20. https:\/\/api.semanticscholar.org\/CorpusID:256763598","key":"20246_CR8","DOI":"10.1007\/s11042-023-14418-w"},{"doi-asserted-by":"crossref","unstructured":"Ali AA, El-Hafeez TA, Mohany YK (2019) An accurate system for face detection and recognition. J Adv Math Comput. https:\/\/api.semanticscholar.org\/CorpusID:201133526","key":"20246_CR9","DOI":"10.9734\/jamcs\/2019\/v33i330178"},{"doi-asserted-by":"crossref","unstructured":"Saabia AAB, El-Hafeez T, Zaki AM (2018) Face recognition based on grey wolf optimization for feature selection. In: International conference on advanced intelligent system and informatics. https:\/\/api.semanticscholar.org\/CorpusID:52134200","key":"20246_CR10","DOI":"10.1007\/978-3-319-99010-1_25"},{"doi-asserted-by":"crossref","unstructured":"Ali AA, El-Hafeez TA, Mohany YK (2019) A robust and efficient system to detect human faces based on facial features. Asian J Res Comput Sci. https:\/\/api.semanticscholar.org\/CorpusID:187398560","key":"20246_CR11","DOI":"10.9734\/ajrcos\/2018\/v2i430080"},{"doi-asserted-by":"crossref","unstructured":"Eman M, Mahmoud TM, Ibrahim MM, El-Hafeez TA (2023) Innovative hybrid approach for masked face recognition using pretrained mask detection and segmentation, robust pca, and knn classifier. Sensors (Basel, Switzerland), 23. https:\/\/api.semanticscholar.org\/CorpusID:260299938","key":"20246_CR12","DOI":"10.3390\/s23156727"},{"doi-asserted-by":"crossref","unstructured":"Taha M, Mostafa T, El-Rahman TA (2023) A novel hybrid approach to masked face recognition using robust pca and goa optimizer. Sci J Damietta Fac Sci. https:\/\/api.semanticscholar.org\/CorpusID:266201423","key":"20246_CR13","DOI":"10.21608\/sjdfs.2023.222524.1117"},{"doi-asserted-by":"crossref","unstructured":"Xiao S, Wang Y, Wang Y (2024) Automatic video colorization based on contrastive learning and optical flow. Multimed Tools Appl 1\u201317. https:\/\/api.semanticscholar.org\/CorpusID:266727535","key":"20246_CR14","DOI":"10.1007\/s11042-023-17883-5"},{"issue":"2","key":"20246_CR15","first-page":"2838","volume":"9","author":"T Abd El-Hafeez","year":"2010","unstructured":"Abd El-Hafeez T (2010) A new system for extracting and detecting skin color regions from pdf documents. International Journal on Computer Science and Engineering (IJCSE) 9(2):2838\u20132846","journal-title":"International Journal on Computer Science and Engineering (IJCSE)"},{"issue":"3","key":"20246_CR16","first-page":"697","volume":"1","author":"MR Girgis","year":"2007","unstructured":"Girgis MR, Mahmoud TM, Abd-El-Hafeez T (2007) An approach to image extraction and accurate skin detection from web pages. Int J Comput Eng 1(3):697\u2013705","journal-title":"Int J Comput Eng"},{"unstructured":"Girgis MR, Mahmoud TM, Abd-El-Hafeez T (2010) A new effective system for filtering pornography images from web pages and pdf files. Int J Web Appl 2:1\u201313. https:\/\/api.semanticscholar.org\/CorpusID:4975813","key":"20246_CR17"},{"doi-asserted-by":"crossref","unstructured":"El Koshiry AM, Eliwa EHI, Abd El-Hafeez T, Khairy M (2024) Detecting cyberbullying using deep learning techniques: a pre-trained glove and focal loss technique. Peer J Comput Sci 10. https:\/\/api.semanticscholar.org\/CorpusID:268764971","key":"20246_CR18","DOI":"10.7717\/peerj-cs.1961"},{"doi-asserted-by":"crossref","unstructured":"Park SK, Chung J-H, Kang T-K, Lim MT (2021) Binary dense sift flow based two stream cnn for human action recognition. Multimed Tools Appl 80:35697 \u2013 35720. https:\/\/api.semanticscholar.org\/CorpusID:236283523","key":"20246_CR19","DOI":"10.1007\/s11042-021-10795-2"},{"doi-asserted-by":"publisher","unstructured":"Pandey AK, Parihar AS (2023) A comparative analysis of deep learning based human action recognition algorithms. In 2023 14th International Conference on Computing Communication and Networking Technologies (ICCCNT), pp 1\u20137. https:\/\/doi.org\/10.1109\/ICCCNT56998.2023.10308200","key":"20246_CR20","DOI":"10.1109\/ICCCNT56998.2023.10308200"},{"doi-asserted-by":"crossref","unstructured":"Devanne M, Wannous H, Berretti S, Pala P, Daoudi M, Bimbo A (2015) 3-d human action recognition by shape analysis of motion trajectories on riemannian manifold. IEEE Trans Cybern 45:1340\u20131352. https:\/\/api.semanticscholar.org\/CorpusID:6933411","key":"20246_CR21","DOI":"10.1109\/TCYB.2014.2350774"},{"doi-asserted-by":"publisher","unstructured":"Horn BKP, Schunck BG (1981) Determining optical flow. Artif Intell 17(1):185\u2013203. ISSN 0004-3702. https:\/\/doi.org\/10.1016\/0004-3702(81)90024-2","key":"20246_CR22","DOI":"10.1016\/0004-3702(81)90024-2"},{"unstructured":"Lucas BD, Kanade T (1981) An iterative image registration technique with an application to stereo vision. In: International joint conference on artificial intelligence. https:\/\/api.semanticscholar.org\/CorpusID:2121536","key":"20246_CR23"},{"doi-asserted-by":"publisher","unstructured":"Chen Z, Zhang C, Li M (2013) Coarse-to-fine optical flow estimation with image structure tensor. In: 2013 6th International Congress on Image and Signal Processing (CISP), vol 2, pp 741\u2013746. https:\/\/doi.org\/10.1109\/CISP.2013.6745263","key":"20246_CR24","DOI":"10.1109\/CISP.2013.6745263"},{"doi-asserted-by":"crossref","unstructured":"Brox T, Bruhn A, Papenberg N, Weickert J (2004) High accuracy optical flow estimation based on a theory for warping. In: European conference on computer vision. https:\/\/api.semanticscholar.org\/CorpusID:76390","key":"20246_CR25","DOI":"10.1007\/978-3-540-24673-2_3"},{"doi-asserted-by":"crossref","unstructured":"Farneb\u00e4ck G (2003) Two-frame motion estimation based on polynomial expansion. In: Scandinavian conference on image analysis. https:\/\/api.semanticscholar.org\/CorpusID:15601477","key":"20246_CR26","DOI":"10.1007\/3-540-45103-X_50"},{"doi-asserted-by":"publisher","unstructured":"Hiraiwa A, Fuse K, Komatsu N, Komiya K, Ikeda H (1999) Accurate estimation of optical flow for fully automated tracking of moving-objects within video streams. In: 1999 IEEE International Symposium on Circuits and Systems (ISCAS), volume\u00a04, pp 515\u2013519 vol 4. https:\/\/doi.org\/10.1109\/ISCAS.1999.780055","key":"20246_CR27","DOI":"10.1109\/ISCAS.1999.780055"},{"doi-asserted-by":"publisher","unstructured":"Bruhn A, Weickert J, Schn\u00f6rr C (2005) Lucas\/Kanade Meets Horn\/Schunck: combining local and global optic flow methods. Int J Comput Vis 61(3):211\u2013231. ISSN 1573-1405. https:\/\/doi.org\/10.1023\/B:VISI.0000045324.43199.43","key":"20246_CR28","DOI":"10.1023\/B:VISI.0000045324.43199.43"},{"doi-asserted-by":"publisher","unstructured":"Ilg E, Mayer N, Saikia T, Keuper M, Dosovitskiy A, Brox T (2017) FlowNet 2.0: Evolution of optical flow estimation with deep networks. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 1647\u20131655. https:\/\/doi.org\/10.1109\/CVPR.2017.179","key":"20246_CR29","DOI":"10.1109\/CVPR.2017.179"},{"doi-asserted-by":"publisher","unstructured":"Sun D, Yang X, Liu M-Y, Kautz J (2018) PWC-Net: CNNs for optical flow using pyramid, warping, and cost volume. In: 2018 IEEE\/CVF Conference on computer vision and pattern recognition, pp 8934\u20138943. https:\/\/doi.org\/10.1109\/CVPR.2018.00931","key":"20246_CR30","DOI":"10.1109\/CVPR.2018.00931"},{"doi-asserted-by":"publisher","unstructured":"Ranjan A, Black MJ (2017) Optical flow estimation using a spatial pyramid network. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 2720\u20132729. https:\/\/doi.org\/10.1109\/CVPR.2017.291","key":"20246_CR31","DOI":"10.1109\/CVPR.2017.291"},{"doi-asserted-by":"crossref","unstructured":"Hui T-W, Tang X, Loy CC (2018) LiteFlowNet: a lightweight convolutional neural network for optical flow estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 8981\u20138989","key":"20246_CR32","DOI":"10.1109\/CVPR.2018.00936"},{"doi-asserted-by":"publisher","unstructured":"Hur J, Roth S (2019) Iterative residual refinement for joint optical flow and occlusion estimation. In: 2019 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR), pp 5747\u20135756. https:\/\/doi.org\/10.1109\/CVPR.2019.00590","key":"20246_CR33","DOI":"10.1109\/CVPR.2019.00590"},{"doi-asserted-by":"crossref","unstructured":"Teed Z, Deng J (2020) Raft: Recurrent all-pairs field transforms for optical flow","key":"20246_CR34","DOI":"10.1007\/978-3-030-58536-5_24"},{"doi-asserted-by":"crossref","unstructured":"Beauchemin SS, Barron JL (1995) The computation of optical flow. ACM Comput Surv 27:433\u2013467. https:\/\/api.semanticscholar.org\/CorpusID:1334552","key":"20246_CR35","DOI":"10.1145\/212094.212141"},{"doi-asserted-by":"crossref","unstructured":"Fortun D, Bouthemy P, Kervrann C (2015) Optical flow modeling and computation: a survey. Comput Vis Image Underst 134:1\u201321. https:\/\/api.semanticscholar.org\/CorpusID:5945559","key":"20246_CR36","DOI":"10.1016\/j.cviu.2015.02.008"},{"doi-asserted-by":"crossref","unstructured":"Tu Z, Xie W, Zhang D, Poppe R, Veltkamp RC, Li B, Yuan J (2019) A survey of variational and cnn-based optical flow techniques. Signal Process Image Commun 72:9\u201324. https:\/\/api.semanticscholar.org\/CorpusID:57980549","key":"20246_CR37","DOI":"10.1016\/j.image.2018.12.002"},{"doi-asserted-by":"crossref","unstructured":"Savian S, Elahi M, Tillo T (2020) Optical flow estimation with deep learning, a survey on recent advances. In: Deep Biometrics. https:\/\/api.semanticscholar.org\/CorpusID:214149658","key":"20246_CR38","DOI":"10.1007\/978-3-030-32583-1_12"},{"doi-asserted-by":"crossref","unstructured":"Hur J, Roth S (2020) Optical flow estimation in the deep learning age. arXiv:2004.02853. https:\/\/api.semanticscholar.org\/CorpusID:214802969","key":"20246_CR39","DOI":"10.1007\/978-3-030-46732-6_7"},{"doi-asserted-by":"crossref","unstructured":"Zhai M, Xiang X, Lv N, Kong X (2021) Optical flow and scene flow estimation: a survey. Pattern Recognit 114:107861. https:\/\/api.semanticscholar.org\/CorpusID:232328500","key":"20246_CR40","DOI":"10.1016\/j.patcog.2021.107861"},{"doi-asserted-by":"crossref","unstructured":"Dobri\u010dki T, Zhuang X, Won KJ, Hong B-W (2022) Survey on unsupervised learning methods for optical flow estimation. 2022 13th International Conference on Information and Communication Technology Convergence (ICTC), pp 591\u2013594. https:\/\/api.semanticscholar.org\/CorpusID:253881748","key":"20246_CR41","DOI":"10.1109\/ICTC55196.2022.9952910"},{"unstructured":"Vaswani A, Shazeer NM, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Neural information processing systems. https:\/\/api.semanticscholar.org\/CorpusID:13756489","key":"20246_CR42"},{"issue":"1","key":"20246_CR43","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1109\/34.23110","volume":"11","author":"H-H Nagel","year":"1989","unstructured":"Nagel H-H (1989) On a constraint equation for the estimation of displacement rates in image sequences. IEEE Trans Pattern Anal Mach Intell 11(1):13\u201330. https:\/\/doi.org\/10.1109\/34.23110","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"20246_CR44","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1007\/BF00158167","volume":"2","author":"P Anandan","year":"1989","unstructured":"Anandan P (1989) A computational framework and an algorithm for the measurement of visual motion. Int J Comput Vis 2(3):283\u2013310","journal-title":"Int J Comput Vis"},{"key":"20246_CR45","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1007\/BF00202895","volume":"60","author":"S Uras","year":"1988","unstructured":"Uras S, Girosi F, Verri A, Torre V (1988) A computational approach to motion perception. Biol Cybern 60:79\u201387","journal-title":"Biol Cybern"},{"unstructured":"Hildreth EC, Ullman S (1982) The measurement of visual motion. ACM Computing Surveys (CSUR)","key":"20246_CR46"},{"issue":"4","key":"20246_CR47","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1109\/TPAMI.1985.4767678","volume":"7","author":"G Adiv","year":"1985","unstructured":"Adiv G (1985) Determining three-dimensional motion and structure from optical flow generated by several moving objects. IEEE Trans Pattern Anal Mach Intell 7(4):384\u2013401","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"20246_CR48","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1006\/cviu.1996.0006","volume":"63","author":"MJ Black","year":"1996","unstructured":"Black MJ, Anandan P (1996) The robust estimation of multiple motions: parametric and piecewise-smooth flow fields. Comp Vision Image Underst 63(1):75\u2013104","journal-title":"Comp Vision Image Underst"},{"key":"20246_CR49","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1023\/A:1008026031844","volume":"30","author":"WB Thompson","year":"1998","unstructured":"Thompson WB (1998) Exploiting discontinuities in optical flow. Int J Comput Vis 30:163\u2013173","journal-title":"Int J Comput Vis"},{"doi-asserted-by":"crossref","unstructured":"Weiss Y, Fleet DJ (2002) Velocity likelihoods in biological and machine vision. Probabilistic models of the brain: perception and neural function, pp 81\u2013100","key":"20246_CR50","DOI":"10.7551\/mitpress\/5583.003.0008"},{"key":"20246_CR51","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/s11263-006-0016-x","volume":"74","author":"S Roth","year":"2007","unstructured":"Roth S, Black MJ (2007) On the spatial statistics of optical flow. Int J Comput Vis 74:33\u201350","journal-title":"Int J Comput Vis"},{"doi-asserted-by":"crossref","unstructured":"Sun D, Roth S, Lewis JP, Black MJ (2008) Learning optical flow. In: European conference on computer vision. https:\/\/api.semanticscholar.org\/CorpusID:969406","key":"20246_CR52","DOI":"10.1007\/978-3-540-88690-7_7"},{"issue":"9","key":"20246_CR53","doi-asserted-by":"crossref","first-page":"1744","DOI":"10.1109\/TPAMI.2011.236","volume":"34","author":"X Li","year":"2011","unstructured":"Li X, Jia J, Matsushita Y (2011) Motion detail preserving optical flow estimation. IEEE Trans Pattern Anal Mach Intell 34(9):1744\u20131757","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"publisher","unstructured":"Weinzaepfel P, Revaud J, Harchaoui Z, Schmid C (2013) Deepflow: Large displacement optical flow with deep matching. In: 2013 IEEE International conference on computer vision, pp 1385\u20131392. https:\/\/doi.org\/10.1109\/ICCV.2013.175","key":"20246_CR54","DOI":"10.1109\/ICCV.2013.175"},{"doi-asserted-by":"crossref","unstructured":"Bailer C, Taetz B, Stricker D (2015) Flow fields: dense correspondence fields for highly accurate large displacement optical flow estimation. In: Proceedings of the IEEE international conference on computer vision, pp 4015\u20134023","key":"20246_CR55","DOI":"10.1109\/ICCV.2015.457"},{"doi-asserted-by":"publisher","unstructured":"Revaud J, Weinzaepfel P, Harchaoui Z, Schmid C (2015) EpicFlow: edge-preserving interpolation of correspondences for optical flow. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 1164\u20131172. https:\/\/doi.org\/10.1109\/CVPR.2015.7298720","key":"20246_CR56","DOI":"10.1109\/CVPR.2015.7298720"},{"doi-asserted-by":"crossref","unstructured":"Butler DJ, Wulff J, Stanley GB, Black MJ (2012) A naturalistic open source movie for optical flow evaluation. In: European conference on computer vision. https:\/\/api.semanticscholar.org\/CorpusID:4637111","key":"20246_CR57","DOI":"10.1007\/978-3-642-33783-3_44"},{"doi-asserted-by":"publisher","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? The KITTI vision benchmark suite. 2012 IEEE Conference on computer vision and pattern recognition, pp 3354\u20133361. https:\/\/doi.org\/10.1109\/CVPR.2012.6248074","key":"20246_CR58","DOI":"10.1109\/CVPR.2012.6248074"},{"doi-asserted-by":"publisher","unstructured":"Dosovitskiy A, Fischer P, Ilg E, H\u00e4usser P, Hazirbas C, Golkov V, van\u00a0der Smagt P, Cremers D, Brox T (2015) FlowNet: learning optical flow with convolutional networks. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp 2758\u20132766. https:\/\/doi.org\/10.1109\/ICCV.2015.316","key":"20246_CR59","DOI":"10.1109\/ICCV.2015.316"},{"unstructured":"Hui T-W, Tang X, Loy CC (2019) A lightweight optical flow cnn - revisiting data fidelity and regularization. In: arXiv:1903.07414","key":"20246_CR60"},{"doi-asserted-by":"publisher","unstructured":"Hui T-W, Tang X, Loy CC (2021) A lightweight optical flow CNN \u2013revisiting data fidelity and regularization. IEEE Trans Pattern Anal Mach Intell 43(8):2555\u20132569 . ISSN 1939-3539. https:\/\/doi.org\/10.1109\/TPAMI.2020.2976928","key":"20246_CR61","DOI":"10.1109\/TPAMI.2020.2976928"},{"unstructured":"Yang G, Ramanan D (2019) Volumetric correspondence networks for optical flow. In: Neural information processing systems. https:\/\/api.semanticscholar.org\/CorpusID:202763868","key":"20246_CR62"},{"unstructured":"Wang J, Zhong Y, Dai Y, Zhang K, Ji P, Li H (2020) Displacement-invariant matching cost learning for accurate optical flow estimation. In: arXiv:2010.14851","key":"20246_CR63"},{"doi-asserted-by":"publisher","unstructured":"Bar-Haim A, Wolf L (2020) ScopeFlow: Dynamic scene scoping for optical flow. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 7995\u20138004. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00802","key":"20246_CR64","DOI":"10.1109\/CVPR42600.2020.00802"},{"doi-asserted-by":"publisher","unstructured":"Zhao S, Sheng Y, Dong Y, Chang EI-C, Xu Y (2020) MaskFlownet: asymmetric feature matching with learnable occlusion mask. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 6277\u20136286. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00631","key":"20246_CR65","DOI":"10.1109\/CVPR42600.2020.00631"},{"doi-asserted-by":"crossref","unstructured":"Luo A, Yang F, Li X, Liu S (2022) Learning optical flow with kernel patch attention. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8896\u20138905. https:\/\/api.semanticscholar.org\/CorpusID:250164362","key":"20246_CR66","DOI":"10.1109\/CVPR52688.2022.00870"},{"doi-asserted-by":"crossref","unstructured":"Luo A, Yang F, Luo K, Li X, Fan H, Liu S (2022) Learning optical flow with adaptive graph reasoning. In: Proceedings of the AAAI conference on artificial intelligence 36:1890\u20131898","key":"20246_CR67","DOI":"10.1609\/aaai.v36i2.20083"},{"unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2021) An Image is Worth 16x16 Words: transformers for image recognition at scale","key":"20246_CR68"},{"doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. arXiv:2005.12872. https:\/\/api.semanticscholar.org\/CorpusID:218889832","key":"20246_CR69","DOI":"10.1007\/978-3-030-58452-8_13"},{"unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu J-J, Ni LMS, Shum HY (2022) Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv:2203.03605. https:\/\/api.semanticscholar.org\/CorpusID:247292561","key":"20246_CR70"},{"doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. arXiv:2103.14030. https:\/\/arxiv.org\/abs\/2103.14030","key":"20246_CR71","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"20246_CR72","first-page":"15908","volume":"34","author":"K Han","year":"2021","unstructured":"Han K, Xiao A, Enhua W, Guo J, Chunjing X, Wang Yunhe (2021) Transformer in transformer. Adv Neural Inf Process Syst 34:15908\u201315919","journal-title":"Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","unstructured":"Menze M, Geiger A (2015) Object scene flow for autonomous vehicles. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 3061\u20133070. https:\/\/doi.org\/10.1109\/CVPR.2015.7298925","key":"20246_CR73","DOI":"10.1109\/CVPR.2015.7298925"},{"unstructured":"Jaegle A, Borgeaud S, Alayrac J-B, Doersch C, Ionescu C, Ding D, Koppula S, Zoran D, Brock A, Shelhamer E et al (2021) Perceiver io: a general architecture for structured inputs & outputs. arXiv:2107.14795","key":"20246_CR74"},{"doi-asserted-by":"crossref","unstructured":"Jiang W, Trulls E, Hosang J, Tagliasacchi A, Yi KM (2021) Cotr: correspondence transformer for matching across images. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6207\u20136217","key":"20246_CR75","DOI":"10.1109\/ICCV48922.2021.00615"},{"doi-asserted-by":"crossref","unstructured":"Liu H, Lu T, Xu Y, Liu J, Li W, Chen L (2022) Camliflow: bidirectional camera-lidar fusion for joint optical flow and scene flow estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5791\u20135801","key":"20246_CR76","DOI":"10.1109\/CVPR52688.2022.00570"},{"doi-asserted-by":"crossref","unstructured":"Luo A, Yang F, Li X, Nie L, Lin C, Fan H, Liu S (2023) Gaflow: incorporating gaussian attention into optical flow. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 9642\u20139651","key":"20246_CR77","DOI":"10.1109\/ICCV51070.2023.00884"},{"doi-asserted-by":"publisher","unstructured":"Jiang S, Campbell D, Lu Y, Li H, Hartley R (2021) Learning to estimate hidden motions with global motion aggregation. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), pp 9752\u20139761. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00963","key":"20246_CR78","DOI":"10.1109\/ICCV48922.2021.00963"},{"doi-asserted-by":"crossref","unstructured":"Chen Y, Zhu D, Shi W, Zhang G, Zhang T, Zhang X, Li J(2023) Mfcflow: a motion feature compensated multi-frame recurrent network for optical flow estimation. In: Proceedings of the IEEE\/CVF Winter conference on applications of computer vision, pp 5068\u20135077","key":"20246_CR79","DOI":"10.1109\/WACV56688.2023.00504"},{"doi-asserted-by":"crossref","unstructured":"Fang G, Chen J, Liang D, Asim M, Reeth FV, Claesen L, Yang Z, Liu W (2023) Feature correlation transformer for estimating ambiguous optical flow. Neural Process Lett 1\u201317","key":"20246_CR80","DOI":"10.21203\/rs.3.rs-2253481\/v1"},{"doi-asserted-by":"crossref","unstructured":"Dong Q, Cao C, Fu Y (2023) Rethinking optical flow from geometric matching consistent perspective. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 1337\u20131347","key":"20246_CR81","DOI":"10.1109\/CVPR52729.2023.00135"},{"doi-asserted-by":"crossref","unstructured":"Weinzaepfel P, Lucas T, Leroy V, Cabon Y, Arora V, Br\u00e9gier R, Csurka G, Antsfeld L, Chidlovskii B, Revaud J (2023) Croco v2: improved cross-view completion pre-training for stereo matching and optical flow. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 17969\u201317980","key":"20246_CR82","DOI":"10.1109\/ICCV51070.2023.01647"},{"doi-asserted-by":"crossref","unstructured":"Shi X, Huang Z, Bian W, Li D, Zhang M, Cheung KC, See S, Qin H, Dai J, Li H (2023) Videoflow: exploiting temporal cues for multi-frame optical flow estimation. arXiv:2303.08340","key":"20246_CR83","DOI":"10.1109\/ICCV51070.2023.01146"},{"doi-asserted-by":"publisher","unstructured":"Xu H, Yang J, Cai J, Zhang J, Tong X (2021) High-resolution optical flow from 1D attention and correlation. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp 10478\u201310487. https:\/\/doi.org\/10.1109\/ICCV48922.2021.01033","key":"20246_CR84","DOI":"10.1109\/ICCV48922.2021.01033"},{"doi-asserted-by":"publisher","unstructured":"Sui X, Li S, Geng X, Wu Y, Xu X, Liu Y, Goh R, Zhu H (2022) CRAFT: cross-attentional flow transformer for robust optical flow. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 17581\u201317590. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01708","key":"20246_CR85","DOI":"10.1109\/CVPR52688.2022.01708"},{"doi-asserted-by":"publisher","unstructured":"Zhao S, Zhao L, Zhang Z, Zhou E, Metaxas D (2022) Global matching with overlapping attention for optical flow estimation. In 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 17571\u201317580. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01707","key":"20246_CR86","DOI":"10.1109\/CVPR52688.2022.01707"},{"doi-asserted-by":"publisher","unstructured":"Xu H, Zhang J, Cai J, Rezatofighi H, Tao D (2022) GMFlow: learning optical flow via global matching. In 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8111\u20138120. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00795","key":"20246_CR87","DOI":"10.1109\/CVPR52688.2022.00795"},{"doi-asserted-by":"crossref","unstructured":"Huang Z, Shi X, Zhang C, Wang Q, Cheung KC, Qin H, Dai J, Li H (2022) Flowformer: a transformer architecture for optical flow. arXiv:2203.16194. https:\/\/api.semanticscholar.org\/CorpusID:247792986","key":"20246_CR88","DOI":"10.1007\/978-3-031-19790-1_40"},{"doi-asserted-by":"crossref","unstructured":"Shi X, Huang Z, Li D, Zhang M, Cheung KC, See S, Qin H, Dai J, Li H (2023) Flowformer++: Masked cost volume autoencoding for pretraining optical flow estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1599\u20131610","key":"20246_CR89","DOI":"10.1109\/CVPR52729.2023.00160"},{"doi-asserted-by":"crossref","unstructured":"Lu Y, Wang Q, Ma S, Geng T, Chen YV, Chen H, Liu D (2023) Transflow: transformer as flow learner. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 18063\u201318073","key":"20246_CR90","DOI":"10.1109\/CVPR52729.2023.01732"},{"doi-asserted-by":"crossref","unstructured":"Zhang Z, Jiang H, Singh H (2024) Neuflow: real-time, high-accuracy optical flow estimation on robots using edge devices. arXiv:2403.10425. https:\/\/api.semanticscholar.org\/CorpusID:268510480","key":"20246_CR91","DOI":"10.1109\/IROS58592.2024.10802353"},{"doi-asserted-by":"crossref","unstructured":"Dong Q, Fu Y (2024) Memflow: optical flow estimation and prediction with memory. arXiv:2404.04808. https:\/\/api.semanticscholar.org\/CorpusID:269005178","key":"20246_CR92","DOI":"10.1109\/CVPR52733.2024.01804"},{"doi-asserted-by":"crossref","unstructured":"Shaw P, Uszkoreit J, Vaswani A (2018) Self-attention with relative position representations. arXiv:1803.02155","key":"20246_CR93","DOI":"10.18653\/v1\/N18-2074"},{"doi-asserted-by":"crossref","unstructured":"He K, Chen X, Xie S, Li Y, Doll\u2019ar P, Girshick RB (2021) Masked autoencoders are scalable vision learners. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 15979\u201315988. https:\/\/api.semanticscholar.org\/CorpusID:243985980","key":"20246_CR94","DOI":"10.1109\/CVPR52688.2022.01553"},{"unstructured":"Dao T, Fu DY, Ermon S, Rudra A, R\u2019e C (2022) Flashattention: fast and memory-efficient exact attention with io-awareness. arXiv:2205.14135. https:\/\/api.semanticscholar.org\/CorpusID:249151871","key":"20246_CR95"},{"doi-asserted-by":"publisher","unstructured":"Baker S, Scharstein D, Lewis JP, Roth S, Black MJ, Szeliski R (2011) A database and evaluation methodology for optical flow. Int J Comput Vis 92(1):1\u201331. ISSN 0920-5691, 1573-1405. https:\/\/doi.org\/10.1007\/s11263-010-0390-2","key":"20246_CR96","DOI":"10.1007\/s11263-010-0390-2"},{"doi-asserted-by":"crossref","unstructured":"Ilg E, Saikia T, Keuper M, Brox T (2018) Occlusions, motion and depth boundaries with a generic network for disparity. Optical flow or scene flow estimation","key":"20246_CR97","DOI":"10.1007\/978-3-030-01258-8_38"},{"doi-asserted-by":"publisher","unstructured":"Mayer N, Ilg E, H\u00e4usser P, Fischer P, Cremers D, Dosovitskiy A, Brox T (2016) A large dataset to train convolutional networks for disparity. Optical flow, and scene flow estimation. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 4040\u20134048. https:\/\/doi.org\/10.1109\/CVPR.2016.438","key":"20246_CR98","DOI":"10.1109\/CVPR.2016.438"},{"doi-asserted-by":"publisher","unstructured":"Kondermann D, Nair R, Honauer K, Krispin K, Andrulis J, Brock A, G\u00fcssefeld B, Rahimimoghaddam M, Hofmann S, Brenner C, J\u00e4hne B (2016) The HCI benchmark suite: stereo and flow ground truth with uncertainties for urban autonomous driving. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp 19\u201328. https:\/\/doi.org\/10.1109\/CVPRW.2016.10","key":"20246_CR99","DOI":"10.1109\/CVPRW.2016.10"},{"doi-asserted-by":"publisher","unstructured":"Richter SR, Hayder Z, Koltun V (2017) Playing for Benchmarks. 2017 IEEE International Conference on Computer Vision (ICCV), pp 2232\u20132241. https:\/\/doi.org\/10.1109\/ICCV.2017.243","key":"20246_CR100","DOI":"10.1109\/ICCV.2017.243"},{"doi-asserted-by":"crossref","unstructured":"Richter SR, Hayder Z, Koltun V (2017) Playing for benchmarks. In: Proceedings of the IEEE International conference on computer vision, pp 2213\u20132222","key":"20246_CR101","DOI":"10.1109\/ICCV.2017.243"},{"doi-asserted-by":"publisher","unstructured":"Sun D, Vlasic D, Herrmann C, Jampani V, Krainin M, Chang H, Zabih R, Freeman WT, Liu C (2021) AutoFlow: Learning a better training set for optical flow. 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 10088\u201310097. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00996","key":"20246_CR102","DOI":"10.1109\/CVPR46437.2021.00996"},{"doi-asserted-by":"crossref","unstructured":"Greff K, Belletti F, Beyer L, Doersch C, Du Y, Duckworth D, Fleet DJ, Gnanapragasam D, Golemo F, Herrmann C et al (2022) Kubric: A scalable dataset generator. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3749\u20133761","key":"20246_CR103","DOI":"10.1109\/CVPR52688.2022.00373"},{"doi-asserted-by":"publisher","unstructured":"Yin Z, Darrell T, Yu F (2019) Hierarchical discrete distribution decomposition for match density estimation. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 6037\u20136046. https:\/\/doi.org\/10.1109\/CVPR.2019.00620","key":"20246_CR104","DOI":"10.1109\/CVPR.2019.00620"},{"doi-asserted-by":"publisher","unstructured":"Jiang S, Lu Y, Li H, Hartley R (2021) Learning optical flow from a few matches. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 16587\u201316595. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01632","key":"20246_CR105","DOI":"10.1109\/CVPR46437.2021.01632"},{"doi-asserted-by":"crossref","unstructured":"Xu N, Yang L, Fan Y, Yue D, Liang Y, Yang J, Huang T (2018) Youtube-vos: a large-scale video object segmentation benchmark. arXiv:1809.03327","key":"20246_CR106","DOI":"10.1007\/978-3-030-01228-1_36"},{"doi-asserted-by":"publisher","unstructured":"Black MJ, Jepson AD (1996) Estimating optical flow in segmented images using variable order parametric models with local deformations. IEEE Transactions on Pattern Analysis and Machine Intelligence, 18(10):972\u2013986. https:\/\/doi.org\/10.1109\/34.541407","key":"20246_CR107","DOI":"10.1109\/34.541407"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20246-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-20246-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20246-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T22:52:03Z","timestamp":1757112723000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-20246-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,25]]},"references-count":107,"journal-issue":{"issue":"24","published-online":{"date-parts":[[2025,7]]}},"alternative-id":["20246"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-20246-3","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,9,25]]},"assertion":[{"value":"6 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 September 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interest"}}]}}