{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:58:57Z","timestamp":1770339537811,"version":"3.49.0"},"reference-count":114,"publisher":"Tsinghua University Press","issue":"1","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T00:00:00Z","timestamp":1666051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comp. Visual. Med."],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s41095-021-0262-4","type":"journal-article","created":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T03:02:42Z","timestamp":1666062162000},"page":"155-175","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Full-duplex strategy for video object segmentation"],"prefix":"10.26599","volume":"9","author":[{"given":"Ge-Peng","family":"Ji","sequence":"first","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deng-Ping","family":"Fan","sequence":"additional","affiliation":[{"name":"Computer Vision Lab, ETH Zurich, ETF C113.2, Sternwartstrasse 7, 8092 Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keren","family":"Fu","sequence":"additional","affiliation":[{"name":"College of Computer Science, Sichuan University, Chengdu, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Wu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianbing","family":"Shen","sequence":"additional","affiliation":[{"name":"School of Computer Science, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ling","family":"Shao","sequence":"additional","affiliation":[{"name":"Inception Institute of Artificial Intelligence, Abu Dhabi, United Arab Emirates"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"11138","reference":[{"key":"262_CR1","doi-asserted-by":"crossref","unstructured":"Wang, Y. Q.; Xu, Z. L.; Wang, X. L.; Shen, C. H.; Cheng, B. S.; Shen, H.; Xia, H. End-to-end video instance segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8737\u20138746, 2021.","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"262_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X.; Li, Z. X.; Yuan, Y.; Yu, G.; Shen, J. X.; Qi, D. L. State-aware tracker for real-time video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 9381\u20139390, 2020.","DOI":"10.1109\/CVPR42600.2020.00940"},{"key":"262_CR3","doi-asserted-by":"crossref","unstructured":"Abramov, A.; Pauwels, K.; Papon, J.; W\u00f6rg\u00f6tter, F.; Dellen, B. Depth-supported real-time video segmentation with the Kinect. In: Proceedings of the IEEE Workshop on the Applications of Computer Vision, 457\u2013464, 2012.","DOI":"10.1109\/WACV.2012.6163000"},{"issue":"1","key":"262_CR4","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364916679498","volume":"36","author":"W Maddern","year":"2017","unstructured":"Maddern, W.; Pascoe, G.; Linegar, C.; Newman, P. 1 year, 1000 km: The Oxford RobotCar dataset. The International Journal of Robotics Research Vol. 36, No. 1, 3\u201315, 2017.","journal-title":"The International Journal of Robotics Research"},{"issue":"1","key":"262_CR5","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1609\/hcomp.v4i1.13288","volume":"4","author":"S Jain","year":"2016","unstructured":"Jain, S.; Grauman, K. Click carving: Segmenting objects in video with point clicks. Proceedings of the AAAI Conference on Human Computation and Crowdsourcing Vol. 4, No. 1, 89\u201398, 2016.","journal-title":"Proceedings of the AAAI Conference on Human Computation and Crowdsourcing"},{"issue":"7","key":"262_CR6","doi-asserted-by":"publisher","first-page":"12152","DOI":"10.1609\/aaai.v34i07.6895","volume":"34","author":"H Wang","year":"2020","unstructured":"Wang, H.; Deng, C.; Ma, F.; Yang, Y. Context modulated dynamic networks for actor and action video segmentation with language queries. Proceedings of the AAAI Conference on Artificial Intelligence Vol. 34, No. 7, 12152\u201312159, 2020.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"7","key":"262_CR7","doi-asserted-by":"publisher","first-page":"10713","DOI":"10.1609\/aaai.v34i07.6699","volume":"34","author":"M Y Ding","year":"2020","unstructured":"Ding, M. Y.; Wang, Z.; Zhou, B. L.; Shi, J. P.; Lu, Z. W.; Luo, P. Every frame counts: Joint learning of video segmentation and optical flow. Proceedings of the AAAI Conference on Artificial Intelligence Vol. 34, No. 7, 10713\u201310720, 2020.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"262_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1007\/978-3-030-87193-2_14","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2014 MICCAI 2021","author":"G P Ji","year":"2021","unstructured":"Ji, G. P.; Chou, Y. C.; Fan, D. P.; Chen, G.; Fu, H.; Jha, D.; Shao, L. Progressively normalized self-attention network for video polyp segmentation. In: Medical Image Computing and Computer Assisted Intervention \u2014 MICCAI 2021. Lecture Notes in Computer Science, Vol. 12901. Springer Cham, 142\u2013152, 2021."},{"key":"262_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/978-3-030-58601-0_18","volume-title":"Computer Vision \u2014 ECCV 2020","author":"B Chen","year":"2020","unstructured":"Chen, B.; Ling, H.; Zeng, X.; Gao, J.; Xu, Z.; Fidler, S. ScribbleBox: Interactive annotation framework for video object segmentation. In: Computer Vision \u2014 ECCV 2020. Lecture Notes in Computer Science, Vol. 12358. Vedaldi, A.; Bischof, H.; Brox, T.; Frahm, J. M. Eds. Springer Cham, 293\u2013310, 2020."},{"key":"262_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1007\/978-3-030-58555-6_13","volume-title":"Computer Vision \u2014 ECCV 2020","author":"S Seo","year":"2020","unstructured":"Seo, S.; Lee, J. Y.; Han, B. URVOS: Unified referring video object segmentation network with a large-scale benchmark. In: Computer Vision \u2014 ECCV 2020. Lecture Notes in Computer Science, Vol. 12360. Vedaldi, A.; Bischof, H.; Brox, T.; Frahm, J. M. Eds. Springer Cham, 208\u2013223, 2020."},{"key":"262_CR11","doi-asserted-by":"crossref","unstructured":"Pan, Y. W.; Yao, T.; Li, H. Q.; Mei, T. Video captioning with transferred semantic attributes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 984\u2013992, 2017.","DOI":"10.1109\/CVPR.2017.111"},{"key":"262_CR12","doi-asserted-by":"crossref","unstructured":"Lee, S. H.; Jang, W. D.; Kim, C. S. Contour-constrained superpixels for image and video processing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 5863\u20135871, 2017.","DOI":"10.1109\/CVPR.2017.621"},{"key":"262_CR13","doi-asserted-by":"crossref","unstructured":"Reso, M.; Jachalsky, J.; Rosenhahn, B.; Ostermann, J. Temporally consistent superpixels. In: Proceedings of the IEEE International Conference on Computer Vision, 385\u2013392, 2013.","DOI":"10.1109\/ICCV.2013.55"},{"key":"262_CR14","doi-asserted-by":"crossref","unstructured":"Ilg, E.; Mayer, N.; Saikia, T.; Keuper, M.; Dosovitskiy, A.; Brox, T. FlowNet 2.0: Evolution of optical flow estimation with deep networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1647\u20131655, 2017.","DOI":"10.1109\/CVPR.2017.179"},{"key":"262_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-58536-5_24","volume-title":"Computer Vision \u2014 ECCV 2020","author":"Z Teed","year":"2020","unstructured":"Teed, Z.; Deng, J. RAFT: Recurrent all-pairs field transforms for optical flow. In: Computer Vision \u2014 ECCV 2020. Lecture Notes in Computer Science, Vol. 12347. Vedaldi, A.; Bischof, H.; Brox, T.; Frahm, J. M. Eds. Springer Cham, 402\u2013419, 2020."},{"issue":"8","key":"262_CR16","doi-asserted-by":"publisher","first-page":"1957","DOI":"10.1109\/TPAMI.2019.2906175","volume":"42","author":"P Hu","year":"2020","unstructured":"Hu, P.; Wang, G.; Kong, X.; Kuen, J.; Tan, Y. Motion-guided cascaded refinement network for video object segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 42, No. 8, 1957\u20131967, 2020.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"262_CR17","doi-asserted-by":"crossref","unstructured":"Tokmakov, P.; Alahari, K.; Schmid, C. Learning video object segmentation with visual memory. In: Proceedings of the IEEE International Conference on Computer Vision, 4491\u20134500, 2017.","DOI":"10.1109\/ICCV.2017.480"},{"key":"262_CR18","doi-asserted-by":"crossref","unstructured":"Fan, D. P.; Wang, W. G.; Cheng, M. M.; Shen, J. B. Shifting more attention to video salient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8546\u20138556, 2019.","DOI":"10.1109\/CVPR.2019.00875"},{"issue":"6","key":"262_CR19","doi-asserted-by":"publisher","first-page":"1613","DOI":"10.1109\/TCSVT.2019.2908779","volume":"30","author":"Z X Chen","year":"2020","unstructured":"Chen, Z. X.; Guo, C. C.; Lai, J. H.; Xie, X. H. Motion-appearance interactive encoding for object segmentation in unconstrained videos. IEEE Transactions on Circuits and Systems for Video Technology Vol. 30, No. 6, 1613\u20131624, 2020.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"262_CR20","doi-asserted-by":"crossref","unstructured":"Yang, Z.; Wang, Q.; Bertinetto, L.; Bai, S.; Hu, W.; Torr, P. Anchor diffusion for unsupervised video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 931\u2013940, 2019.","DOI":"10.1109\/ICCV.2019.00102"},{"key":"262_CR21","doi-asserted-by":"crossref","unstructured":"Jain, S. D.; Xiong, B.; Grauman, K. FusionSeg: Learning to combine motion and appearance for fully automatic segmentation of generic objects in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2117\u20132126, 2017.","DOI":"10.1109\/CVPR.2017.228"},{"key":"262_CR22","unstructured":"Khoreva, A.; Benenson, R.; Ilg, E.; Brox, T.; Schiele, B. Lucid data dreaming for object tracking. In: Proceedings of the 2017 DAVIS Challenge on Video Object Segmentation \u2014 CVPR 2017 Workshops, 2017."},{"key":"262_CR23","doi-asserted-by":"crossref","unstructured":"Cheng, J.; Tsai, Y.-H.; Wang, S.; Yang, M.-H. SegFlow: Joint learning for video object segmentation and optical flow. In: Proceedings of the IEEE International Conference on Computer Vision, 686\u2013695, 2017.","DOI":"10.1109\/ICCV.2017.81"},{"issue":"5","key":"262_CR24","first-page":"1205","volume":"42","author":"H X Xiao","year":"2020","unstructured":"Xiao, H. X.; Kang, B. Y.; Liu, Y.; Zhang, M. J.; Feng, J. S. Online meta adaptation for fast video object segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 42, No. 5, 1205\u20131217, 2020.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"7","key":"262_CR25","doi-asserted-by":"publisher","first-page":"13066","DOI":"10.1609\/aaai.v34i07.7008","volume":"34","author":"T F Zhou","year":"2020","unstructured":"Zhou, T. F.; Wang, S. Z.; Zhou, Y.; Yao, Y. Z.; Li, J. W.; Shao, L. Motion-attentive transition for zero-shot video object segmentation. Proceedings of the AAAI Conference on Artificial Intelligence Vol. 34, No. 7, 13066\u201313073, 2020.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"262_CR26","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H.; Yang, M.-H.; Black, M. J. Video segmentation via object flow. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3899\u20133908, 2016.","DOI":"10.1109\/CVPR.2016.423"},{"key":"262_CR27","doi-asserted-by":"publisher","first-page":"103864","DOI":"10.1016\/j.imavis.2019.103864","volume":"94","author":"F Q Lin","year":"2020","unstructured":"Lin, F. Q.; Chou, Y.; Martinez, T. Flow adaptive video object segmentation. Image and Vision Computing Vol. 94, 103864, 2020.","journal-title":"Image and Vision Computing"},{"key":"262_CR28","doi-asserted-by":"crossref","unstructured":"Nilsson, D.; Sminchisescu, C. Semantic video segmentation by gated recurrent flow propagation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 6819\u20136828, 2018.","DOI":"10.1109\/CVPR.2018.00713"},{"key":"262_CR29","doi-asserted-by":"crossref","unstructured":"Li, H.; Chen, G.; Li, G.; Yu, Y. Motion guided attention for video salient object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 7273\u20137282, 2019.","DOI":"10.1109\/ICCV.2019.00737"},{"issue":"12","key":"262_CR30","doi-asserted-by":"publisher","first-page":"3083","DOI":"10.1109\/TMM.2019.2918730","volume":"21","author":"Q M Peng","year":"2019","unstructured":"Peng, Q. M.; Cheung, Y. M. Automatic video object segmentation based on visual and motion saliency. IEEE Transactions on Multimedia Vol. 21, No. 12, 3083\u20133094, 2019.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"4","key":"262_CR31","first-page":"219","volume":"4","author":"C Koch","year":"1985","unstructured":"Koch, C.; Ullman, S. Shifts in selective visual attention: Towards the underlying neural circuitry. Human Neurobiology Vol. 4, No. 4, 219\u2013227, 1985.","journal-title":"Human Neurobiology"},{"issue":"3","key":"262_CR32","first-page":"419","volume":"15","author":"J M Wolfe","year":"1989","unstructured":"Wolfe, J. M.; Cave, K. R.; Franzel, S. L. Guided search: An alternative to the feature integration model for visual search. Journal of Experimental Psychology: Human Perception and Performance Vol. 15, No. 3, 419\u2013433, 1989.","journal-title":"Journal of Experimental Psychology: Human Perception and Performance"},{"issue":"7","key":"262_CR33","doi-asserted-by":"publisher","first-page":"2413","DOI":"10.1109\/TPAMI.2020.2966453","volume":"43","author":"W G Wang","year":"2021","unstructured":"Wang, W. G.; Shen, J. B.; Lu, X. K.; Hoi, S. C. H.; Ling, H. B. Paying attention to video object pattern understanding. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 43, No. 7, 2413\u20132428, 2021.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"262_CR34","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1145\/2534169.2486033","volume":"43","author":"D Bharadia","year":"2013","unstructured":"Bharadia, D.; McMilin, E.; Katti, S. Full duplex radios. ACM SIGCOMM Computer Communication Review Vol. 43, No. 4, 375\u2013386, 2013.","journal-title":"ACM SIGCOMM Computer Communication Review"},{"key":"262_CR35","doi-asserted-by":"crossref","unstructured":"Perazzi, F.; Pont-Tuset, J.; McWilliams, B.; van Gool, L.; Gross, M.; Sorkine-Hornung, A. A benchmark dataset and evaluation methodology for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 724\u2013732, 2016.","DOI":"10.1109\/CVPR.2016.85"},{"key":"262_CR36","doi-asserted-by":"crossref","unstructured":"Ji, G. P.; Fu, K. R.; Wu, Z.; Fan, D. P.; Shen, J. B.; Shao, L. Full-duplex strategy for video object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 4902\u20134913, 2021.","DOI":"10.1109\/ICCV48922.2021.00488"},{"key":"262_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1007\/978-3-030-58542-6_38","volume-title":"Computer Vision \u2014 ECCV 2020","author":"H Seong","year":"2020","unstructured":"Seong, H.; Hyun, J.; Kim, E. Kernelized memory network for video object segmentation. In: Computer Vision \u2014 ECCV 2020. Lecture Notes in Computer Science, Vol. 12367. Vedaldi, A.; Bischof, H.; Brox, T.; Frahm, J. M. Eds. Springer Cham, 629\u2013645, 2020."},{"key":"262_CR38","doi-asserted-by":"crossref","unstructured":"Bhat, G.; Lawin, F. J.; Danelljan, M.; Robinson, A.; Felsberg, M.; van Gool, L.; Timofte, R. Learning what to learn for video object segmentation. In: Proceedings of the Computer Vision \u2014 ECCV 2020: 16th European Conference, 777\u2013794, 2020.","DOI":"10.1007\/978-3-030-58536-5_46"},{"key":"262_CR39","doi-asserted-by":"crossref","unstructured":"Hu, L.; Zhang, P.; Zhang, B.; Pan, P.; Xu, Y. H.; Jin, R. Learning position and target consistency for memory-based video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 4142\u20134152, 2021.","DOI":"10.1109\/CVPR46437.2021.00413"},{"key":"262_CR40","doi-asserted-by":"crossref","unstructured":"Duke, B.; Ahmed, A.; Wolf, C.; Aarabi, P.; Taylor, G. W. SSTVOS: Sparse spatiotemporal transformers for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 5908\u20135917, 2021.","DOI":"10.1109\/CVPR46437.2021.00585"},{"key":"262_CR41","doi-asserted-by":"publisher","first-page":"8326","DOI":"10.1109\/TIP.2020.3013162","volume":"29","author":"T Zhou","year":"2020","unstructured":"Zhou, T.; Li, J.; Wang, S.; Tao, R.; Shen, J. MATNet: Motion-attentive transition network for zero-shot video object segmentation. IEEE Transactions on Image Processing Vol. 29, 8326\u20138338, 2020.","journal-title":"IEEE Transactions on Image Processing"},{"key":"262_CR42","doi-asserted-by":"crossref","unstructured":"Ochs, P.; Brox, T. Higher order motion models and spectral clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 614\u2013621, 2012.","DOI":"10.1109\/CVPR.2012.6247728"},{"key":"262_CR43","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K.; Zhang, G.; Shi, J. B. Video segmentation by tracing discontinuities in a trajectory embedding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1846\u20131853, 2012.","DOI":"10.1109\/CVPR.2012.6247883"},{"key":"262_CR44","doi-asserted-by":"crossref","unstructured":"Li, F.; Kim, T.; Humayun, A.; Tsai, D.; Rehg, J. M. Video segmentation by tracking many figure-ground segments. In: Proceedings of the IEEE International Conference on Computer Vision, 2192\u20132199, 2013.","DOI":"10.1109\/ICCV.2013.273"},{"key":"262_CR45","doi-asserted-by":"crossref","unstructured":"Perazzi, F.; Wang, O.; Gross, M.; Sorkine-Hornung, A. Fully connected object proposals for video segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, 3227\u20133234, 2015.","DOI":"10.1109\/ICCV.2015.369"},{"key":"262_CR46","unstructured":"Wang, W. G.; Shen, J. B.; Porikli, F. Saliency-aware geodesic video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3395\u20133402, 2015."},{"issue":"10","key":"262_CR47","doi-asserted-by":"publisher","first-page":"3137","DOI":"10.1109\/TIP.2015.2438550","volume":"24","author":"W G Wang","year":"2015","unstructured":"Wang, W. G.; Shen, J. B.; Li, X. L.; Porikli, F. Robust video object cosegmentation. IEEE Transactions on Image Processing Vol. 24, No. 10, 3137\u20133148, 2015.","journal-title":"IEEE Transactions on Image Processing"},{"key":"262_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"760","DOI":"10.1007\/978-3-642-37331-2_57","volume-title":"Computer Vision \u2014 ACCV 2012","author":"F Galasso","year":"2013","unstructured":"Galasso, F.; Cipolla, R.; Schiele, B. Video segmentation with superpixels. In: Computer Vision \u2014 ACCV 2012. Lecture Notes in Computer Science, Vol. 7724. Lee, K. M.; Matsushita, Y.; Rehg, J. M.; Hu, Z. Eds. Springer Berlin Heidelberg, 760\u2013774, 2013."},{"key":"262_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1007\/978-3-642-33783-3_45","volume-title":"Computer Vision \u2014 ECCV 2012","author":"C Xu","year":"2012","unstructured":"Xu, C.; Xiong, C.; Corso, J. J. Streaming hierarchical video segmentation. In: Computer Vision \u2014 ECCV 2012. Lecture Notes in Computer Science, Vol. 7577. Fitzgibbon, A.; Lazebnik, S.; Perona, P.; Sato, Y.; Schmid, C. Eds. Springer Berlin Heidelberg, 626\u2013639, 2012."},{"key":"262_CR50","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"744","DOI":"10.1007\/978-3-030-01252-6_44","volume-title":"Computer Vision \u2014 ECCV 2018","author":"H Song","year":"2018","unstructured":"Song, H.; Wang, W.; Zhao, S.; Shen, J.; Lam, K. M. Pyramid dilated deeper ConvLSTM for video salient object detection. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11215. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 744\u2013760, 2018."},{"key":"262_CR51","doi-asserted-by":"crossref","unstructured":"Wang, W. G.; Song, H. M.; Zhao, S. Y.; Shen, J. B.; Zhao, S. Y.; Hoi, S. C. H.; Ling, H. Learning unsupervised video object segmentation through visual attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 3059\u20133069, 2019.","DOI":"10.1109\/CVPR.2019.00318"},{"key":"262_CR52","doi-asserted-by":"crossref","unstructured":"Zheng, J.; Luo, W. X.; Piao, Z. X. Cascaded ConvLSTMs using semantically-coherent data synthesis for video object segmentation. IEEE Access Vol. 7, 132120\u2013132129, 2019.","DOI":"10.1109\/ACCESS.2019.2940768"},{"key":"262_CR53","doi-asserted-by":"crossref","unstructured":"Tokmakov, P.; Alahari, K.; Schmid, C. Learning motion patterns in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 531\u2013539, 2017.","DOI":"10.1109\/CVPR.2017.64"},{"key":"262_CR54","doi-asserted-by":"crossref","unstructured":"Siam, M.; Jiang, C.; Lu, S.; Petrich, L.; Gamal, M.; Elhoseiny, M.; Jagersand, M. Video object segmentation using teacher-student adaptation in a human robot interaction (HRI) setting. In: Proceedings of the International Conference on Robotics and Automation, 50\u201356, 2019.","DOI":"10.1109\/ICRA.2019.8794254"},{"key":"262_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/978-3-030-01219-9_13","volume-title":"Computer Vision \u2014 ECCV 2018","author":"S Li","year":"2018","unstructured":"Li, S.; Seybold, B.; Vorobyov, A.; Lei, X.; Kuo, C. C. J. Unsupervised video object segmentation with motion-based bilateral networks. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11207. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 215\u2013231, 2018."},{"issue":"1","key":"262_CR56","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/TPAMI.2017.2662005","volume":"40","author":"W Wang","year":"2018","unstructured":"Wang, W.; Shen, J.; Yang, R.; Porikli, F. Saliency-aware video object segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 40, No. 1, 20\u201333, 2018.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"11","key":"262_CR57","doi-asserted-by":"publisher","first-page":"2993","DOI":"10.1109\/TMM.2018.2829605","volume":"20","author":"X F Zhou","year":"2018","unstructured":"Zhou, X. F.; Liu, Z.; Gong, C.; Liu, W. Improving video saliency detection via localized estimation and spatiotemporal refinement. IEEE Transactions on Multimedia Vol. 20, No. 11, 2993\u20133007, 2018.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"7","key":"262_CR58","first-page":"2191","volume":"30","author":"M Z Xu","year":"2020","unstructured":"Xu, M. Z.; Liu, B.; Fu, P.; Li, J. B.; Hu, Y. H.; Feng, S. Video salient object detection via robust seeds extraction and multi-graphs manifold propagation. IEEE Transactions on Circuits and Systems for Video Technology Vol. 30, No. 7, 2191\u20132206, 2020.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"262_CR59","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1007\/978-3-030-01246-5_48","volume-title":"Computer Vision \u2014 ECCV 2018","author":"Y T Hu","year":"2018","unstructured":"Hu, Y. T.; Huang, J. B.; Schwing, A. G. Unsupervised video object segmentation using motion saliency-guided spatio-temporal propagation. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11205. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 813\u2013830, 2018."},{"issue":"1","key":"262_CR60","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/TIP.2017.2754941","volume":"27","author":"W G Wang","year":"2018","unstructured":"Wang, W. G.; Shen, J. B.; Shao, L. Video salient object detection via fully convolutional networks. IEEE Transactions on Image Processing Vol. 27, No. 1, 38\u201349, 2018.","journal-title":"IEEE Transactions on Image Processing"},{"key":"262_CR61","doi-asserted-by":"crossref","unstructured":"Le, T. N.; Sugimoto, A. Deeply supervised 3D recurrent FCN for salient object detection in videos. In: Proceedings of the British Machine Vision Conference, 38.1-38.13, 2017.","DOI":"10.5244\/C.31.38"},{"key":"262_CR62","doi-asserted-by":"crossref","unstructured":"Min, K.; Corso, J. TASED-net: Temporally-aggregating spatial encoder-decoder network for video saliency detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2394\u20132403, 2019.","DOI":"10.1109\/ICCV.2019.00248"},{"key":"262_CR63","doi-asserted-by":"crossref","unstructured":"Li, G. B.; Xie, Y.; Wei, T. H.; Wang, K. Z.; Lin, L. Flow guided recurrent neural encoder for video salient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 3243\u20133252, 2018.","DOI":"10.1109\/CVPR.2018.00342"},{"issue":"10","key":"262_CR64","doi-asserted-by":"publisher","first-page":"5002","DOI":"10.1109\/TIP.2018.2849860","volume":"27","author":"T N Le","year":"2018","unstructured":"Le, T. N.; Sugimoto, A. Video salient object detection using spatiotemporal deep features. IEEE Transactions on Image Processing Vol. 27, No. 10, 5002\u20135015, 2018.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"5","key":"262_CR65","doi-asserted-by":"publisher","first-page":"1153","DOI":"10.1109\/TMM.2019.2940851","volume":"22","author":"Y X Li","year":"2020","unstructured":"Li, Y. X.; Li, S.; Chen, C.; Hao, A. M.; Qin, H. Accurate and robust video saliency detection via self-paced diffusion. IEEE Transactions on Multimedia Vol. 22, No. 5, 1153\u20131167, 2020.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"2","key":"262_CR66","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1007\/s41095-019-0149-9","volume":"5","author":"A Borji","year":"2019","unstructured":"Borji, A.; Cheng, M. M.; Hou, Q. B.; Jiang, H. Z.; Li, J. Salient object detection: A survey. Computational Visual Media Vol. 5, No. 2, 117\u2013150, 2019.","journal-title":"Computational Visual Media"},{"issue":"1","key":"262_CR67","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s41095-020-0199-z","volume":"7","author":"T Zhou","year":"2021","unstructured":"Zhou, T.; Fan, D. P.; Cheng, M. M.; Shen, J. B.; Shao, L. RGB-D salient object detection: A survey. Computational Visual Media Vol. 7, No. 1, 37\u201369, 2021.","journal-title":"Computational Visual Media"},{"key":"262_CR68","doi-asserted-by":"publisher","first-page":"1090","DOI":"10.1109\/TIP.2019.2934350","volume":"29","author":"C Chen","year":"2020","unstructured":"Chen, C.; Wang, G. T.; Peng, C.; Zhang, X. W.; Qin, H. Improved robust video saliency detection based on long-term spatial-temporal information. IEEE Transactions on Image Processing Vol. 29, 1090\u20131100, 2020.","journal-title":"IEEE Transactions on Image Processing"},{"key":"262_CR69","doi-asserted-by":"crossref","unstructured":"Yan, P. X.; Li, G. B.; Xie, Y.; Li, Z.; Wang, C.; Chen, T. S.; Lin, L. Semi-supervised video salient object detection using pseudo-labels. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 7283\u20137292, 2019.","DOI":"10.1109\/ICCV.2019.00738"},{"issue":"7","key":"262_CR70","doi-asserted-by":"publisher","first-page":"1973","DOI":"10.1109\/TCSVT.2018.2859773","volume":"29","author":"Y Tang","year":"2019","unstructured":"Tang, Y.; Zou, W. B.; Jin, Z.; Chen, Y. H.; Hua, Y.; Li, X. Weakly supervised salient object detection with spatiotemporal cascade neural networks. IEEE Transactions on Circuits and Systems for Video Technology Vol. 29, No. 7, 1973\u20131984, 2019.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"262_CR71","doi-asserted-by":"crossref","unstructured":"Wang, Z.; Yan, X. Y.; Han, Y. H.; Sun, M. J. Ranking video salient object detection. In: Proceedings of the 27th ACM International Conference on Multimedia, 873\u2013881, 2019.","DOI":"10.1145\/3343031.3350882"},{"key":"262_CR72","doi-asserted-by":"crossref","unstructured":"Zhao, W. B.; Zhang, J.; Li, L.; Barnes, N.; Liu, N.; Han, J. W. Weakly supervised video salient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 16821\u201316830, 2021.","DOI":"10.1109\/CVPR46437.2021.01655"},{"key":"262_CR73","doi-asserted-by":"crossref","unstructured":"He, K. M.; Zhang, X. Y.; Ren, S. Q.; Sun, J. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 770\u2013778, 2016.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"7","key":"262_CR74","doi-asserted-by":"publisher","first-page":"12321","DOI":"10.1609\/aaai.v34i07.6916","volume":"34","author":"J Wei","year":"2020","unstructured":"Wei, J.; Wang, S. H.; Huang, Q. M. F3Net: Fusion, feedback and focus for salient object detection. Proceedings of the AAAI Conference on Artificial Intelligence Vol. 34, No. 7, 12321\u201312328, 2020.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"262_CR75","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/978-3-030-01249-6_17","volume-title":"Computer Vision \u2014 ECCV 2018","author":"Z Zhang","year":"2018","unstructured":"Zhang, Z.; Zhang, X.; Peng, C.; Xue, X.; Sun, J. ExFuse: Enhancing feature fusion for semantic segmentation. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11214. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 273\u2013288, 2018."},{"key":"262_CR76","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/978-3-030-12939-2_20","volume-title":"Pattern Recognition","author":"L Sevilla-Lara","year":"2019","unstructured":"Sevilla-Lara, L.; Liao, Y.; Gu\u00fcney, F.; Jampani, V.; Geiger, A.; Black, M. J. On the integration of optical flow and action recognition. In: Pattern Recognition. Lecture Notes in Computer Science, Vol. 11269. Brox, T.; Bruhn, A.; Fritz, M. Eds. Springer Cham, 281\u2013297, 2019."},{"key":"262_CR77","doi-asserted-by":"crossref","unstructured":"Wu, Z.; Su, L.; Huang, Q. Stacked cross refinement network for edge-aware salient object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 7263\u20137272, 2019.","DOI":"10.1109\/ICCV.2019.00736"},{"key":"262_CR78","doi-asserted-by":"crossref","unstructured":"Lin, T. Y.; Doll\u00e1r, P.; Girshick, R.; He, K. M.; Hariharan, B.; Belongie, S. Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 936\u2013944, 2017.","DOI":"10.1109\/CVPR.2017.106"},{"key":"262_CR79","doi-asserted-by":"crossref","unstructured":"Zhao, H. S.; Shi, J. P.; Qi, X. J.; Wang, X. G.; Jia, J. Y. Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 6230\u20136239, 2017.","DOI":"10.1109\/CVPR.2017.660"},{"key":"262_CR80","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2014 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O.; Fischer, P.; Brox, T. U-Net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention \u2014 MICCAI 2015. Lecture Notes in Computer Science, Vol. 9351. Navab, N.; Hornegger, J.; Wells, W.; Frangi, A. Eds. Springer Cham, 234\u2013241, 2015."},{"key":"262_CR81","unstructured":"Paszke, A.; Gross, S.; Massa, F.; Lerer, A.; Bradbury, J.; Chanan, G.; Killeen, T.; Lin, Z.; Gimelshein, N.; Antiga, L. et al. PyTorch: An imperative style, high-performance deep learning library. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems, 8026\u20138037, 2019."},{"issue":"9","key":"262_CR82","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K M He","year":"2015","unstructured":"He, K. M.; Zhang, X. Y.; Ren, S. Q.; Sun, J. Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 37, No. 9, 1904\u20131916, 2015.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"262_CR83","doi-asserted-by":"crossref","unstructured":"Lu, X. K.; Wang, W. G.; Ma, C.; Shen, J. B.; Shao, L.; Porikli, F. See more, know more: Unsupervised video object segmentation with co-attention Siamese networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 3618\u20133627, 2019.","DOI":"10.1109\/CVPR.2019.00374"},{"key":"262_CR84","unstructured":"Kr\u00e4henb\u00fchl, P.; Koltun, V. Efficient inference in fully connected CRFs with Gaussian edge potentials. In: Proceedings of the 24th International Conference on Neural Information Processing Systems, 109\u2013117, 2011."},{"issue":"8","key":"262_CR85","doi-asserted-by":"publisher","first-page":"2552","DOI":"10.1109\/TIP.2015.2425544","volume":"24","author":"H Kim","year":"2015","unstructured":"Kim, H.; Kim, Y.; Sim, J. Y.; Kim, C. S. Spatiotemporal saliency detection for video sequences based on random walk with restart. IEEE Transactions on Image Processing Vol. 24, No. 8, 2552\u20132564, 2015.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"6","key":"262_CR86","doi-asserted-by":"publisher","first-page":"1187","DOI":"10.1109\/TPAMI.2013.242","volume":"36","author":"P Ochs","year":"2014","unstructured":"Ochs, P.; Malik, J.; Brox, T. Segmentation of moving objects by long term video analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 36, No. 6, 1187\u20131200, 2014.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"262_CR87","doi-asserted-by":"crossref","unstructured":"Wang, L. J.; Lu, H. C.; Wang, Y. F.; Feng, M. Y.; Wang, D.; Yin, B. C.; Ruan, X. Learning to detect salient objects with image-level supervision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3796\u20133805, 2017.","DOI":"10.1109\/CVPR.2017.404"},{"key":"262_CR88","doi-asserted-by":"crossref","unstructured":"Achanta, R.; Hemami, S.; Estrada, F.; Susstrunk, S. Frequency-tuned salient region detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1597\u20131604, 2009.","DOI":"10.1109\/CVPR.2009.5206596"},{"issue":"3","key":"262_CR89","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1109\/TPAMI.2014.2345401","volume":"37","author":"M M Cheng","year":"2015","unstructured":"Cheng, M. M.; Mitra, N. J.; Huang, X. L.; Torr, P. H. S.; Hu, S. M. Global contrast based salient region detection. IEEE Transactions on Pattern Analysis and Machine Intelligence Vol. 37, No. 3, 569\u2013582, 2015.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"12","key":"262_CR90","doi-asserted-by":"publisher","first-page":"5706","DOI":"10.1109\/TIP.2015.2487833","volume":"24","author":"A Borji","year":"2015","unstructured":"Borji, A.; Cheng, M. M.; Jiang, H. Z.; Li, J. Salient object detection: A benchmark. IEEE Transactions on Image Processing Vol. 24, No. 12, 5706\u20135722, 2015.","journal-title":"IEEE Transactions on Image Processing"},{"key":"262_CR91","doi-asserted-by":"crossref","unstructured":"Fan, D. P.; Cheng, M. M.; Liu, Y.; Li, T.; Borji, A. Structure-measure: A new way to evaluate foreground maps. In: Proceedings of the IEEE International Conference on Computer Vision, 4558\u20134567, 2017.","DOI":"10.1109\/ICCV.2017.487"},{"key":"262_CR92","doi-asserted-by":"crossref","unstructured":"Wang, W. G.; Lu, X. K.; Shen, J. B.; Crandall, D.; Shao, L. Zero-shot video object segmentation via attentive graph neural networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 9235\u20139244, 2019.","DOI":"10.1109\/ICCV.2019.00933"},{"key":"262_CR93","doi-asserted-by":"crossref","unstructured":"Faisal, M.; Akhter, I.; Ali, M.; Hartley, R. EpO-net: Exploiting geometric constraints on dense trajectories for motion saliency. In: Proceedings of the IEEE Winter Conference on Applications of Computer Vision, 1873\u20131882, 2020.","DOI":"10.1109\/WACV45572.2020.9093589"},{"issue":"3","key":"262_CR94","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/s11263-018-1122-2","volume":"127","author":"P Tokmakov","year":"2019","unstructured":"Tokmakov, P.; Schmid, C.; Alahari, K. Learning to segment moving objects. International Journal of Computer Vision volume Vol. 127, No. 3, 282\u2013301, 2019.","journal-title":"International Journal of Computer Vision volume"},{"key":"262_CR95","doi-asserted-by":"crossref","unstructured":"Koh, Y. J.; Kim, C. S. Primary object segmentation in videos based on region augmentation and reduction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 7417\u20137425, 2017.","DOI":"10.1109\/CVPR.2017.784"},{"key":"262_CR96","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1007\/978-3-030-01249-6_27","volume-title":"Computer Vision \u2014 ECCV 2018","author":"D Lao","year":"2018","unstructured":"Lao, D.; Sundaramoorthi, G. Extending layered models to 3D motion. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11214. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 441\u2013457, 2018."},{"key":"262_CR97","doi-asserted-by":"crossref","unstructured":"Papazoglou, A.; Ferrari, V. Fast object segmentation in unconstrained video. In: Proceedings of the IEEE International Conference on Computer Vision, 1777\u20131784, 2013.","DOI":"10.1109\/ICCV.2013.223"},{"key":"262_CR98","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1007\/978-3-030-58558-7_20","volume-title":"Computer Vision \u2014 ECCV 2020","author":"Z Yang","year":"2020","unstructured":"Yang, Z.; Wei, Y.; Yang, Y. Collaborative video object segmentation by foreground-background integration. In: Computer Vision \u2014 ECCV 2020. Lecture Notes in Computer Science, Vol. 12350. Vedaldi, A.; Bischof, H.; Brox, T.; Frahm, J. M. Eds. Springer Cham, 332\u2013348, 2020."},{"key":"262_CR99","doi-asserted-by":"crossref","unstructured":"Johnander, J.; Danelljan, M.; Brissman, E.; Khan, F. S.; Felsberg, M. A generative appearance model for end-to-end video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8945\u20138954, 2019.","DOI":"10.1109\/CVPR.2019.00916"},{"key":"262_CR100","doi-asserted-by":"crossref","unstructured":"Oh, S. W.; Lee, J. Y.; Sunkavalli, K.; Kim, S. J. Fast video object segmentation by reference-guided mask propagation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 7376\u20137385, 2018.","DOI":"10.1109\/CVPR.2018.00770"},{"key":"262_CR101","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P.; Chai, Y. N.; Schroff, F.; Adam, H.; Leibe, B.; Chen, L. C. FEELVOS: Fast end-to-end embedding learning for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 9473\u20139482, 2019.","DOI":"10.1109\/CVPR.2019.00971"},{"key":"262_CR102","doi-asserted-by":"crossref","unstructured":"Cheng, J. C.; Tsai, Y. H.; Hung, W. C.; Wang, S. J.; Yang, M. H. Fast and accurate online video object segmentation via tracking parts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 7415\u20137424, 2018.","DOI":"10.1109\/CVPR.2018.00774"},{"key":"262_CR103","doi-asserted-by":"crossref","unstructured":"Caelles, S.; Maninis, K. K.; Pont-Tuset, J.; Leal-Taix\u00e9, L.; Cremers, D.; van Gool, L. One-shot video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 5320\u20135329, 2017.","DOI":"10.1109\/CVPR.2017.565"},{"key":"262_CR104","doi-asserted-by":"crossref","unstructured":"Perazzi, F.; Khoreva, A.; Benenson, R.; Schiele, B.; Sorkine-Hornung, A. Learning video object segmentation from static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 3491\u20133500, 2017.","DOI":"10.1109\/CVPR.2017.372"},{"issue":"7","key":"262_CR105","doi-asserted-by":"publisher","first-page":"3345","DOI":"10.1109\/TIP.2018.2813165","volume":"27","author":"Y H Chen","year":"2018","unstructured":"Chen, Y. H.; Zou, W. B.; Tang, Y.; Li, X.; Xu, C.; Komodakis, N. SCOM: Spatiotemporal constrained optimization for salient object detection. IEEE Transactions on Image Processing Vol. 27, No. 7, 3345\u20133357, 2018.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"10","key":"262_CR106","doi-asserted-by":"publisher","first-page":"4819","DOI":"10.1109\/TIP.2019.2910377","volume":"28","author":"R M Cong","year":"2019","unstructured":"Cong, R. M.; Lei, J. J.; Fu, H. Z.; Porikli, F.; Huang, Q. M.; Hou, C. P. Video saliency detection via sparsity-based reconstruction and propagation. IEEE Transactions on Image Processing Vol. 28, No. 10, 4819\u20134831, 2019.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"11","key":"262_CR107","doi-asserted-by":"publisher","first-page":"2790","DOI":"10.1109\/TMM.2019.2914889","volume":"21","author":"M Z Xu","year":"2019","unstructured":"Xu, M. Z.; Liu, B.; Fu, P.; Li, J. B.; Hu, Y. H. Video saliency detection via graph clustering with motion energy and spatiotemporal objectness. IEEE Transactions on Multimedia Vol. 21, No. 11, 2790\u20132805, 2019.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"7","key":"262_CR108","doi-asserted-by":"publisher","first-page":"10869","DOI":"10.1609\/aaai.v34i07.6718","volume":"34","author":"Y C Gu","year":"2020","unstructured":"Gu, Y. C.; Wang, L. J.; Wang, Z. Q.; Liu, Y.; Cheng, M. M.; Lu, S. P. Pyramid constrained self-attention network for fast video salient object detection. Proceedings of the AAAI Conference on Artificial Intelligence Vol. 34, No. 7, 10869\u201310876, 2020.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"9","key":"262_CR109","doi-asserted-by":"publisher","first-page":"1475","DOI":"10.1360\/SSI-2020-0370","volume":"51","author":"D-P Fan","year":"2021","unstructured":"Fan, D.-P.; Ji, G.-P.; Qin, X.; Cheng, M.-M. Cognitive vision inspired object segmentation metric and loss function. SCIENTIA SINICA Informationis Vol. 51, No. 9, 1475\u20131489, 2021. (in Chinese)","journal-title":"SCIENTIA SINICA Informationis"},{"key":"262_CR110","unstructured":"Mahadevan, S.; Athar, A.; O\u0161ep, A.; Hennen, S.; Leal-Taix\u00e9, L.; Leibe, B. Making a case for 3D convolutions for object segmentation in videos. In: Proceedings of the 31st British Machine Vision Conference, 2020."},{"key":"262_CR111","series-title":"Lecture Notes in Computer-Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer-Vision \u2014 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y.; Maire, M.; Belongie, S.; Hays, J.; Perona, P.; Ramanan, D.; Doll\u00e1r, P.; Zitnick, C. L. Microsoft COCO: Common objects in context. In: Computer-Vision \u2014 ECCV 2014. Lecture Notes in Computer-Science, Vol. 8693. Fleet, D.; Pajdla, T.; Schiele, B.; Tuytelaars, T. Eds. Springer Cham, 740\u2013755, 2014."},{"key":"262_CR112","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1007\/978-3-030-01228-1_36","volume-title":"Computer Vision \u2014 ECCV 2018","author":"N Xu","year":"2018","unstructured":"Xu, N.; Yang, L.; Fan, Y.; Yang, J.; Yue, D.; Liang, Y.; Price, B.; Cohen, S.; Huang, T. YouTube-VOS: Sequence-to-sequence video object segmentation. In: Computer Vision \u2014 ECCV 2018. Lecture Notes in Computer Science, Vol. 11209. Ferrari, V.; Hebert, M.; Sminchisescu, C.; Weiss, Y. Eds. Springer Cham, 603\u2013619, 2018."},{"key":"262_CR113","doi-asserted-by":"crossref","unstructured":"Wang, W. H.; Xie, E. Z.; Li, X.; Fan, D. P.; Song, K. T.; Liang, D.; Lu, T.; Luo, P.; Shao, L. Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 548\u2013558, 2021.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"262_CR114","doi-asserted-by":"crossref","unstructured":"Zhuge, M. C.; Gao, D. H.; Fan, D. P.; Jin, L. B.; Chen, B.; Zhou, H. M.; Qiu, M.; Shao, L. Kaleido-BERT: Vision-language pre-training on fashion domain. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 12642\u201312652, 2021.","DOI":"10.1109\/CVPR46437.2021.01246"}],"container-title":["Computational Visual Media"],"original-title":[],"link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41095-021-0262-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41095-021-0262-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41095-021-0262-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10750449\/10897681\/10897692.pdf?arnumber=10897692","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T18:38:55Z","timestamp":1762367935000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10897692\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3]]},"references-count":114,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1007\/s41095-021-0262-4","relation":{},"ISSN":["2096-0662","2096-0433"],"issn-type":[{"value":"2096-0662","type":"electronic"},{"value":"2096-0433","type":"print"}],"subject":[],"published":{"date-parts":[[2023,3]]},"assertion":[{"value":"1 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declaration of competing interest"}}]}}