{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:32:38Z","timestamp":1770337958201,"version":"3.49.0"},"reference-count":95,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T00:00:00Z","timestamp":1691539200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T00:00:00Z","timestamp":1691539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16417-3","type":"journal-article","created":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T08:02:35Z","timestamp":1691568155000},"page":"24715-24748","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A systematic review of deep learning frameworks for moving object segmentation"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4796-1071","authenticated-orcid":false,"given":"Dipika","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Manish","family":"Kumar","sequence":"additional","affiliation":[]},{"given":"Sachin","family":"Chaudhary","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,9]]},"reference":[{"issue":"29\u201330","key":"16417_CR1","doi-asserted-by":"publisher","first-page":"20483","DOI":"10.1007\/s11042-019-07988-1","volume":"79","author":"I Bakkouri","year":"2020","unstructured":"Bakkouri I, Afdel K (2020) Computer-aided diagnosis (cad) system based on multi-layer feature fusion network for skin lesion recognition in dermoscopy images. Multimedia Tools and Applications 79(29\u201330):20483\u201320518","journal-title":"Multimedia Tools and Applications"},{"key":"16417_CR2","first-page":"1","volume-title":"Mlca2f: Multi-level context attentional feature fusion for covid-19 lesion segmentation from ct scans","author":"I Bakkouri","year":"2022","unstructured":"Bakkouri I, Afdel K (2022) Mlca2f: Multi-level context attentional feature fusion for covid-19 lesion segmentation from ct scans. Signal, Image and Video Processing, pp 1\u20138"},{"key":"16417_CR3","unstructured":"Ballas N, Yao L, Pal C, Courville A (2015) Delving deeper into convolutional networks for learning video representations. arXiv preprint arXiv:1511.06432"},{"key":"16417_CR4","doi-asserted-by":"crossref","unstructured":"Bertinetto L, Valmadre J, Henriques JF, Vedaldi A, Torr PH (2016) Fully-convolutional siamese networks for object tracking. In: European Conference on Computer Vision, Springer, pp 850\u2013865","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"16417_CR5","doi-asserted-by":"crossref","unstructured":"Bhat G, Lawin FJ, Danelljan M, Robinson A, Felsberg M, Gool LV, Timofte R (2020) Learning what to learn for video object segmentation. In: European Conference on Computer Vision, Springer, pp 777\u2013794","DOI":"10.1007\/978-3-030-58536-5_46"},{"key":"16417_CR6","doi-asserted-by":"crossref","unstructured":"Botach A, Zheltonozhskii E, Baskin C (2022) End-to-end referring video object segmentation with multimodal transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4985\u20134995","DOI":"10.1109\/CVPR52688.2022.00493"},{"key":"16417_CR7","doi-asserted-by":"crossref","unstructured":"Brox T, Malik J (2010) Object segmentation by long term analysis of point trajectories. In: European Conference on Computer Vision, Springer, pp 282\u2013295","DOI":"10.1007\/978-3-642-15555-0_21"},{"key":"16417_CR8","doi-asserted-by":"crossref","unstructured":"Caelles S, Maninis K-K, Pont-Tuset J, Leal-Taix\u00e9 L, Cremers D, Van Gool L (2017) One-shot video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 221\u2013230","DOI":"10.1109\/CVPR.2017.565"},{"key":"16417_CR9","unstructured":"Caelles S, Pont-Tuset J, Perazzi F, Montes A, Maninis K-K, Van Gool L (2019) The 2019 davis challenge on vos: Unsupervised multi-object segmentation. arXiv preprint arXiv:1905.00737"},{"key":"16417_CR10","doi-asserted-by":"crossref","unstructured":"Chen Y-W, Jin X, Shen X, Yang M-H (2022) Video salient object detection via contrastive features and attention modules. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 1320\u20131329","DOI":"10.1109\/WACV51458.2022.00061"},{"key":"16417_CR11","doi-asserted-by":"crossref","unstructured":"Cheng HK, Tai Y-W, Tang C-K (2021) Modular interactive video object segmentation: Interaction-to-mask, propagation and difference-aware fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5559\u20135568","DOI":"10.1109\/CVPR46437.2021.00551"},{"key":"16417_CR12","unstructured":"Cheng HK, Tai Y-W, Tang C-K (2021) Rethinking space-time networks with improved memory coverage for efficient video object segmentation. Advances in Neural Information Processing Systems 34"},{"key":"16417_CR13","doi-asserted-by":"crossref","unstructured":"Cheng J, Tsai Y-H, Wang S, Yang M-H (2017) Segflow: Joint learning for video object segmentation and optical flow. In: Proceedings of the IEEE International Conference on Computer Vision, pp 686\u2013695","DOI":"10.1109\/ICCV.2017.81"},{"key":"16417_CR14","doi-asserted-by":"crossref","unstructured":"Chen X, Li Z, Yuan Y, Yu G, Shen J, Qi D (2020) State-aware tracker for real-time video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 9384\u20139393","DOI":"10.1109\/CVPR42600.2020.00940"},{"key":"16417_CR15","doi-asserted-by":"crossref","unstructured":"Cho S, Lee H, Kim M, Jang S, Lee S (2022) Pixel-level bijective matching for video object segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 129\u2013138","DOI":"10.1109\/WACV51458.2022.00152"},{"issue":"10","key":"16417_CR16","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1109\/TPAMI.2003.1233909","volume":"25","author":"R Cucchiara","year":"2003","unstructured":"Cucchiara R, Grana C, Piccardi M, Prati A (2003) Detecting moving objects, ghosts, and shadows in video streams. IEEE Transactions on Pattern Analysis and Machine Intelligence 25(10):1337\u20131342","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"6","key":"16417_CR17","doi-asserted-by":"publisher","first-page":"1614","DOI":"10.1109\/TNN.2007.896861","volume":"18","author":"D Culibrk","year":"2007","unstructured":"Culibrk D, Marques O, Socek D, Kalva H, Furht B (2007) Neural network approach to background modeling for video object segmentation. IEEE Transactions on Neural Networks 18(6):1614\u20131627","journal-title":"IEEE Transactions on Neural Networks"},{"issue":"3","key":"16417_CR18","doi-asserted-by":"publisher","first-page":"297","DOI":"10.2307\/1932409","volume":"26","author":"LR Dice","year":"1945","unstructured":"Dice LR (1945) Measures of the amount of ecologic association between species. Ecology 26(3):297\u2013302","journal-title":"Ecology"},{"key":"16417_CR19","doi-asserted-by":"crossref","unstructured":"Duarte K, Rawat YS, Shah M (2019) Capsulevos: Semi-supervised video object segmentation using capsule routing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8480\u20138489","DOI":"10.1109\/ICCV.2019.00857"},{"key":"16417_CR20","doi-asserted-by":"crossref","unstructured":"Duke B, Ahmed A, Wolf C, Aarabi P, Taylor GW (2021) Sstvos: Sparse spatiotemporal transformers for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5912\u20135921","DOI":"10.1109\/CVPR46437.2021.00585"},{"key":"16417_CR21","doi-asserted-by":"crossref","unstructured":"Dutt Jain S, Xiong B, Grauman K (2017) Fusionseg: Learning to combine motion and appearance for fully automatic segmentation of generic objects in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 3664\u20133673","DOI":"10.1109\/CVPR.2017.228"},{"key":"16417_CR22","doi-asserted-by":"crossref","unstructured":"Fiaz M, Zaheer MZ, Mahmood A, Lee S-I, Jung SK (2021) 4g-vos: Video object segmentation using guided context embedding. Knowl-Based Syst 231:107401","DOI":"10.1016\/j.knosys.2021.107401"},{"key":"16417_CR23","doi-asserted-by":"crossref","unstructured":"Gao M, Zheng F, Yu JJ, Shan C, Ding G, Han J (2022) Deep learning for video object segmentation: a review. Artif Intell Rev 1\u201375","DOI":"10.1007\/s10462-022-10176-7"},{"key":"16417_CR24","doi-asserted-by":"crossref","unstructured":"Ge W, Lu X, Shen J (2021) Video object segmentation using global and instance embedding learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 16836\u201316845","DOI":"10.1109\/CVPR46437.2021.01656"},{"key":"16417_CR25","doi-asserted-by":"crossref","unstructured":"Han J, Yang L, Zhang D, Chang X, Liang X (2018) Reinforcement cutting-agent learning for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 9080\u20139089","DOI":"10.1109\/CVPR.2018.00946"},{"key":"16417_CR26","doi-asserted-by":"crossref","unstructured":"Heo Y, Koh YJ, Kim C-S (2021) Guided interactive video object segmentation using reliability-based attention maps. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 7322\u20137330","DOI":"10.1109\/CVPR46437.2021.00724"},{"key":"16417_CR27","doi-asserted-by":"crossref","unstructured":"Hu Y-T, Chen H-S, Hui K, Huang J-B, Schwing AG (2019) Sail-vos: Semantic amodal instance level video object segmentation-a synthetic dataset and baselines. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3105\u20133115","DOI":"10.1109\/CVPR.2019.00322"},{"key":"16417_CR28","unstructured":"Hu Y-T, Huang J-B, Schwing A (2017) Maskrnn: Instance level video object segmentation. Advances in neural information processing systems 30"},{"key":"16417_CR29","doi-asserted-by":"crossref","unstructured":"Hu L, Zhang P, Zhang B, Pan P, Xu Y, Jin R (2021) Learning position and target consistency for memory-based video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4144\u20134154","DOI":"10.1109\/CVPR46437.2021.00413"},{"key":"16417_CR30","doi-asserted-by":"crossref","unstructured":"Ji G-P, Fu K, Wu Z, Fan D-P, Shen J, Shao L (2021) Full-duplex strategy for video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 4922\u20134933","DOI":"10.1109\/ICCV48922.2021.00488"},{"key":"16417_CR31","doi-asserted-by":"crossref","unstructured":"Johnander J, Danelljan M, Brissman E, Khan FS, Felsberg M (2019) A generative appearance model for end-to-end video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8953\u20138962","DOI":"10.1109\/CVPR.2019.00916"},{"key":"16417_CR32","doi-asserted-by":"crossref","unstructured":"Lamdouar H, Yang C, Xie W, Zisserman A (2020) Betrayed by motion: Camouflaged object discovery via motion segmentation. In: Proceedings of the Asian Conference on Computer Vision","DOI":"10.1007\/978-3-030-69532-3_30"},{"key":"16417_CR33","doi-asserted-by":"publisher","first-page":"1228","DOI":"10.1609\/aaai.v36i2.20009","volume":"36","author":"M Lan","year":"2022","unstructured":"Lan M, Zhang J, He F, Zhang L (2022) Siamese network with interactive transformer for video object segmentation. Proceedings of the AAAI Conference on Artificial Intelligence 36:1228\u20131236","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"16417_CR34","doi-asserted-by":"crossref","unstructured":"Lee Y, Seong H, Kim E (2021) Iteratively selecting an easy reference frame makes unsupervised video object segmentation easier. arXiv preprint arXiv:2112.12402","DOI":"10.1609\/aaai.v36i2.20011"},{"key":"16417_CR35","first-page":"3430","volume":"33","author":"Y Liang","year":"2020","unstructured":"Liang Y, Li X, Jafari N, Chen J (2020) Video object segmentation with adaptive feature bank and uncertain-region refinement. Advances in Neural Information Processing Systems 33:3430\u20133441","journal-title":"Advances in Neural Information Processing Systems"},{"key":"16417_CR36","doi-asserted-by":"crossref","unstructured":"Li M, Hu L, Xiong Z, Zhang B, Pan P, Liu D (2022) Recurrent dynamic embedding for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1332\u20131341","DOI":"10.1109\/CVPR52688.2022.00139"},{"key":"16417_CR37","doi-asserted-by":"publisher","unstructured":"Li F, Kim T, Humayun A, Tsai D, Rehg JM (2013) Video segmentation by tracking many figure-ground segments. In: 2013 IEEE International Conference on Computer Vision, pp 2192\u20132199. https:\/\/doi.org\/10.1109\/ICCV.2013.273","DOI":"10.1109\/ICCV.2013.273"},{"key":"16417_CR38","doi-asserted-by":"crossref","unstructured":"Li X, Loy CC (2018) Video object segmentation with joint re-identification and attention-aware mask propagation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 90\u2013105","DOI":"10.1007\/978-3-030-01219-9_6"},{"key":"16417_CR39","doi-asserted-by":"crossref","unstructured":"Lin H, Qi X, Jia J (2019) Agss-vos: Attention guided single-shot video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 3949\u20133957","DOI":"10.1109\/ICCV.2019.00405"},{"key":"16417_CR40","doi-asserted-by":"crossref","unstructured":"Lin Z, Yang T, Li M, Wang Z, Yuan C, Jiang W, Liu W (2022) Swem: Towards real-time video object segmentation with sequential weighted expectation-maximization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1362\u20131372","DOI":"10.1109\/CVPR52688.2022.00142"},{"key":"16417_CR41","doi-asserted-by":"crossref","unstructured":"Li S, Seybold B, Vorobyov A, Fathi A, Huang Q, Kuo C-CJ (2018) Instance embedding transfer to unsupervised video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 6526\u20136535","DOI":"10.1109\/CVPR.2018.00683"},{"key":"16417_CR42","doi-asserted-by":"crossref","unstructured":"Li S, Seybold B, Vorobyov A, Lei X, Kuo C-CJ (2018) Unsupervised video object segmentation with motion-based bilateral networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 207\u2013223","DOI":"10.1007\/978-3-030-01219-9_13"},{"key":"16417_CR43","first-page":"1","volume":"71","author":"Z Liu","year":"2021","unstructured":"Liu Z, Liu J, Chen W, Wu X, Li Z (2021) Faminet: Learning real-time semisupervised video object segmentation with steepest optimized optical flow. IEEE Trans Instrum Meas 71:1\u201316","journal-title":"IEEE Trans Instrum Meas"},{"key":"16417_CR44","doi-asserted-by":"crossref","unstructured":"Liu Y, Yu R, Yin F, Zhao X, Zhao W, Xia W, Yang Y (2022) Learning quality-aware dynamic memory for video object segmentation. arXiv preprint arXiv:2207.07922","DOI":"10.1007\/978-3-031-19818-2_27"},{"key":"16417_CR45","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"16417_CR46","doi-asserted-by":"crossref","unstructured":"Luiten J, Zulfikar IE, Leibe B (2020) Unovost: Unsupervised offline video object segmentation and tracking. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 2000\u20132009","DOI":"10.1109\/WACV45572.2020.9093285"},{"key":"16417_CR47","doi-asserted-by":"crossref","unstructured":"Lu X, Wang W, Danelljan M, Zhou T, Shen J, Gool LV (2020) Video object segmentation with episodic graph memory networks. In: European Conference on Computer Vision, Springer, pp 661\u2013679","DOI":"10.1007\/978-3-030-58580-8_39"},{"issue":"6","key":"16417_CR48","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1109\/TPAMI.2018.2838670","volume":"41","author":"K-K Maninis","year":"2018","unstructured":"Maninis K-K, Caelles S, Chen Y, Pont-Tuset J, Leal-Taix\u00e9 L, Cremers D, Van Gool L (2018) Video object segmentation without temporal information. IEEE Transactions on Pattern Analysis and Machine Intelligence 41(6):1515\u20131530","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"16417_CR49","doi-asserted-by":"crossref","unstructured":"Mao Y, Wang N, Zhou W, Li H (2021) Joint inductive and transductive learning for video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9670\u20139679","DOI":"10.1109\/ICCV48922.2021.00953"},{"key":"16417_CR50","doi-asserted-by":"crossref","unstructured":"Miao J, Wei Y, Yang Y (2020) Memory aggregation networks for efficient interactive video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10366\u201310375","DOI":"10.1109\/CVPR42600.2020.01038"},{"issue":"6","key":"16417_CR51","doi-asserted-by":"publisher","first-page":"1187","DOI":"10.1109\/TPAMI.2013.242","volume":"36","author":"P Ochs","year":"2013","unstructured":"Ochs P, Malik J, Brox T (2013) Segmentation of moving objects by long term video analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence 36(6):1187\u20131200","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"16417_CR52","doi-asserted-by":"crossref","unstructured":"Oh SW, Lee J-Y, Sunkavalli K, Kim SJ (2018) Fast video object segmentation by reference-guided mask propagation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 7376\u20137385","DOI":"10.1109\/CVPR.2018.00770"},{"key":"16417_CR53","doi-asserted-by":"crossref","unstructured":"Oh SW, Lee J-Y, Xu N, Kim SJ (2019) Video object segmentation using space-time memory networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9226\u20139235","DOI":"10.1109\/ICCV.2019.00932"},{"key":"16417_CR54","doi-asserted-by":"crossref","unstructured":"Park K, Woo S, Oh SW, Kweon IS, Lee J-Y (2022) Per-clip video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1352\u20131361","DOI":"10.1109\/CVPR52688.2022.00141"},{"key":"16417_CR55","doi-asserted-by":"crossref","unstructured":"Patil PW, Biradar KM, Dudhane A, Murala S (2020) An end-to-end edge aggregation network for moving object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8149\u20138158","DOI":"10.1109\/CVPR42600.2020.00817"},{"key":"16417_CR56","doi-asserted-by":"crossref","unstructured":"Perazzi F, Khoreva A, Benenson R, Schiele B, Sorkine-Hornung A (2017) Learning video object segmentation from static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2663\u20132672","DOI":"10.1109\/CVPR.2017.372"},{"key":"16417_CR57","doi-asserted-by":"publisher","unstructured":"Perazzi F, Pont-Tuset J, McWilliams B, Van Gool L, Gross M, Sorkine-Hornung A (2016) A benchmark dataset and evaluation methodology for video object segmentation. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 724\u2013732. https:\/\/doi.org\/10.1109\/CVPR.2016.85","DOI":"10.1109\/CVPR.2016.85"},{"key":"16417_CR58","doi-asserted-by":"crossref","unstructured":"Perazzi F, Pont-Tuset J, McWilliams B, Van Gool L, Gross M, Sorkine-Hornung A (2016) A benchmark dataset and evaluation methodology for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 724\u2013732","DOI":"10.1109\/CVPR.2016.85"},{"key":"16417_CR59","unstructured":"Pont-Tuset J, Perazzi F, Caelles S, Arbel\u00e1ez P, Sorkine-Hornung A, Van Gool L (2017) The 2017 davis challenge on video object segmentation. arXiv preprint arXiv:1704.00675"},{"key":"16417_CR60","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28"},{"key":"16417_CR61","doi-asserted-by":"crossref","unstructured":"Ren S, Liu W, Liu Y, Chen H, Han G, He S (2021) Reciprocal transformations for unsupervised video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 15455\u201315464","DOI":"10.1109\/CVPR46437.2021.01520"},{"key":"16417_CR62","doi-asserted-by":"crossref","unstructured":"Rother C, Kolmogorov V, Blake A (2004) \u201dgrabcut\u201d interactive foreground extraction using iterated graph cuts. ACM Trans Graph 23(3):309\u2013314","DOI":"10.1145\/1015706.1015720"},{"key":"16417_CR63","doi-asserted-by":"crossref","unstructured":"Schmidt C, Athar A, Mahadevan S, Leibe B (2022) D2conv3d: Dynamic dilated convolutions for object segmentation in videos. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 1200\u20131209","DOI":"10.1109\/WACV51458.2022.00199"},{"key":"16417_CR64","doi-asserted-by":"crossref","unstructured":"Seo S, Lee J-Y, Han B (2020) Urvos: Unified referring video object segmentation network with a large-scale benchmark. In: European Conference on Computer Vision, Springer, pp 208\u2013223","DOI":"10.1007\/978-3-030-58555-6_13"},{"key":"16417_CR65","doi-asserted-by":"crossref","unstructured":"Seong H, Hyun J, Kim E (2020) Kernelized memory network for video object segmentation. In: European Conference on Computer Vision, Springer, pp 629\u2013645","DOI":"10.1007\/978-3-030-58542-6_38"},{"key":"16417_CR66","doi-asserted-by":"crossref","unstructured":"Seong H, Oh SW, Lee J-Y, Lee S, Lee S, Kim E (2021) Hierarchical memory matching network for video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 12889\u201312898","DOI":"10.1109\/ICCV48922.2021.01265"},{"key":"16417_CR67","unstructured":"Shi X, Chen Z, Wang H, Yeung D-Y, Wong W-K, Woo W-c (2015) Convolutional lstm network: A machine learning approach for precipitation nowcasting. Advances in neural information processing systems 28"},{"key":"16417_CR68","doi-asserted-by":"crossref","unstructured":"Tokmakov P, Alahari K, Schmid C (2017) Learning video object segmentation with visual memory. In: Proceedings of the IEEE International Conference on Computer Vision, pp 4481\u20134490","DOI":"10.1109\/ICCV.2017.480"},{"issue":"2","key":"16417_CR69","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1007\/s11263-011-0512-5","volume":"100","author":"D Tsai","year":"2012","unstructured":"Tsai D, Flagg M, Nakazawa A, Rehg JM (2012) Motion coherent tracking using multi-label mrf optimization. Int J Comput Vis 100(2):190\u2013202","journal-title":"Int J Comput Vis"},{"key":"16417_CR70","doi-asserted-by":"crossref","unstructured":"Ventura C, Bellver M, Girbau A, Salvador A, Marques F, Giro-i-Nieto X (2019) Rvos: End-to-end recurrent network for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5277\u20135286","DOI":"10.1109\/CVPR.2019.00542"},{"key":"16417_CR71","doi-asserted-by":"crossref","unstructured":"Voigtlaender P, Chai Y, Schroff F, Adam H, Leibe B, Chen L-C (2019) Feelvos: Fast end-to-end embedding learning for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 9481\u20139490","DOI":"10.1109\/CVPR.2019.00971"},{"key":"16417_CR72","doi-asserted-by":"crossref","unstructured":"Voigtlaender P, Leibe B (2017) Online adaptation of convolutional neural networks for video object segmentation. arXiv preprint arXiv:1706.09364","DOI":"10.5244\/C.31.116"},{"key":"16417_CR73","doi-asserted-by":"crossref","unstructured":"Voigtlaender P, Luo L, Yuan C, Jiang Y, Leibe B (2021) Reducing the annotation effort for video object segmentation datasets. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 3060\u20133069","DOI":"10.1109\/WACV48630.2021.00310"},{"issue":"4","key":"16417_CR74","doi-asserted-by":"publisher","first-page":"985","DOI":"10.1109\/TPAMI.2018.2819173","volume":"41","author":"W Wang","year":"2018","unstructured":"Wang W, Shen J, Porikli F, Yang R (2018) Semi-supervised video object segmentation with super-trajectories. IEEE Transactions on Pattern Analysis and Machine Intelligence 41(4):985\u2013998","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"16417_CR75","doi-asserted-by":"crossref","unstructured":"Wang H, Jiang X, Ren H, Hu Y, Bai S (2021) Swiftnet: Real-time video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1296\u20131305","DOI":"10.1109\/CVPR46437.2021.00135"},{"key":"16417_CR76","doi-asserted-by":"crossref","unstructured":"Wang W, Lu X, Shen J, Crandall DJ, Shao L (2019) Zero-shot video object segmentation via attentive graph neural networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9236\u20139245","DOI":"10.1109\/ICCV.2019.00933"},{"key":"16417_CR77","doi-asserted-by":"crossref","unstructured":"Wang W, Song H, Zhao S, Shen J, Zhao S, Hoi SC, Ling H (2019) Learning unsupervised video object segmentation through visual attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3064\u20133074","DOI":"10.1109\/CVPR.2019.00318"},{"issue":"3","key":"16417_CR78","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3506716","volume":"13","author":"L Wei","year":"2022","unstructured":"Wei L, Lang C, Liang L, Feng S, Wang T, Chen S (2022) Weakly supervised video object segmentation via dual-attention cross-branch fusion. ACM Transactions on Intelligent Systems and Technology (TIST) 13(3):1\u201320","journal-title":"ACM Transactions on Intelligent Systems and Technology (TIST)"},{"key":"16417_CR79","doi-asserted-by":"crossref","unstructured":"Wu D, Dong X, Shao L, Shen J (2022) Multi-level representation learning with semantic alignment for referring video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4996\u20135005","DOI":"10.1109\/CVPR52688.2022.00494"},{"key":"16417_CR80","doi-asserted-by":"crossref","unstructured":"Wu J, Jiang Y, Sun P, Yuan Z, Luo P (2022) Language as queries for referring video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4974\u20134984","DOI":"10.1109\/CVPR52688.2022.00492"},{"key":"16417_CR81","doi-asserted-by":"crossref","unstructured":"Xie H, Yao H, Zhou S, Zhang S, Sun W (2021) Efficient regional memory network for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1286\u20131295","DOI":"10.1109\/CVPR46437.2021.00134"},{"key":"16417_CR82","doi-asserted-by":"publisher","first-page":"12549","DOI":"10.1609\/aaai.v34i07.6944","volume":"34","author":"Y Xu","year":"2020","unstructured":"Xu Y, Wang Z, Li Z, Yuan Y, Yu G (2020) Siamfc++: Towards robust and accurate visual tracking with target estimation guidelines. Proceedings of the AAAI Conference on Artificial Intelligence 34:12549\u201312556","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"16417_CR83","unstructured":"Xu X, Wang J, Li X, Lu Y (2021) Reliable propagation-correction modulation for video object segmentation. arXiv preprint arXiv:2112.02853"},{"key":"16417_CR84","doi-asserted-by":"crossref","unstructured":"Xu N, Yang L, Fan Y, Yue D, Liang Y, Yang J, Huang T (2018) Youtube-vos: A large-scale video object segmentation benchmark. arXiv preprint arXiv:1809.03327","DOI":"10.1007\/978-3-030-01228-1_36"},{"key":"16417_CR85","doi-asserted-by":"crossref","unstructured":"Xu K, Yao A (2021) Efficient video object segmentation with compressed video. arXiv preprint arXiv:2107.12192","DOI":"10.1109\/CVPR52688.2022.00140"},{"key":"16417_CR86","doi-asserted-by":"crossref","unstructured":"Xu K, Yao A (2022) Accelerating video object segmentation with compressed video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1342\u20131351","DOI":"10.1109\/CVPR52688.2022.00140"},{"key":"16417_CR87","first-page":"2491","volume":"34","author":"Z Yang","year":"2021","unstructured":"Yang Z, Wei Y, Yang Y (2021) Associating objects with transformers for video object segmentation. Advances in Neural Information Processing Systems 34:2491\u20132502","journal-title":"Advances in Neural Information Processing Systems"},{"key":"16417_CR88","doi-asserted-by":"crossref","unstructured":"Yang L, Fan Y, Xu N (2019) Video instance segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 5188\u20135197","DOI":"10.1109\/ICCV.2019.00529"},{"key":"16417_CR89","doi-asserted-by":"crossref","unstructured":"Yang L, Wang Y, Xiong X, Yang J, Katsaggelos AK (2018) Efficient video object segmentation via network modulation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 6499\u20136507","DOI":"10.1109\/CVPR.2018.00680"},{"key":"16417_CR90","doi-asserted-by":"crossref","unstructured":"Yin Y, Xu D, Wang X, Zhang L (2021) Agunet: Annotation-guided u-net for fast one-shot video object segmentation. Pattern Recogn 110:107580","DOI":"10.1016\/j.patcog.2020.107580"},{"key":"16417_CR91","doi-asserted-by":"crossref","unstructured":"Yin Z, Zheng J, Luo W, Qian S, Zhang H, Gao S (2021) Learning to recommend frame for interactive video object segmentation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 15445\u201315454","DOI":"10.1109\/CVPR46437.2021.01519"},{"key":"16417_CR92","doi-asserted-by":"crossref","unstructured":"Yu F, Chen H, Wang X, Xian W, Chen Y, Liu F, Madhavan V, Darrell T (2020) Bdd100k: A diverse driving dataset for heterogeneous multitask learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2636\u20132645","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"16417_CR93","doi-asserted-by":"crossref","unstructured":"Zhang D, Javed O, Shah M (2013) Video object segmentation through spatially accurate and temporally dense extraction of primary object regions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 628\u2013635","DOI":"10.1109\/CVPR.2013.87"},{"key":"16417_CR94","doi-asserted-by":"crossref","unstructured":"Zhang L, Lin Z, Zhang J, Lu H, He Y (2019) Fast video object segmentation via dynamic targeting network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 5582\u20135591","DOI":"10.1109\/ICCV.2019.00568"},{"key":"16417_CR95","doi-asserted-by":"crossref","unstructured":"Zhou T, Li J, Li X, Shao L (2021) Target-aware object discovery and association for unsupervised video multi-object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 6985\u20136994","DOI":"10.1109\/CVPR46437.2021.00691"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16417-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16417-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16417-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T22:32:10Z","timestamp":1729895530000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16417-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,9]]},"references-count":95,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2024,3]]}},"alternative-id":["16417"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16417-3","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,9]]},"assertion":[{"value":"16 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 May 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Statements and Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Funding and\/or Conflicts of interests\/Competing interests"}}]}}