{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T06:26:21Z","timestamp":1778912781564,"version":"3.51.4"},"reference-count":95,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T00:00:00Z","timestamp":1709769600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T00:00:00Z","timestamp":1709769600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276046"],"award-info":[{"award-number":["62276046"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s11263-024-02024-8","type":"journal-article","created":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T21:12:48Z","timestamp":1709845968000},"page":"3232-3250","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Adaptive Multi-Source Predictor for Zero-Shot Video Object Segmentation"],"prefix":"10.1007","volume":"132","author":[{"given":"Xiaoqi","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shijie","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youwei","family":"Pang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaxing","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9241-1688","authenticated-orcid":false,"given":"Lihe","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huchuan","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,7]]},"reference":[{"key":"2024_CR1","doi-asserted-by":"crossref","unstructured":"Achanta, R., Hemami, Sheila, Estrada, F., & S\u00fcsstrunk, S. (2009). Frequency-tuned salient region detection. In CVPR (pp. 1597\u20131604).","DOI":"10.1109\/CVPRW.2009.5206596"},{"key":"2024_CR2","unstructured":"An, N., Zhao, X.-G., & Hou, Z.-G. (2016). Online rgb-d tracking via detection-learning-segmentation. In ICPR (pp. 1231\u20131236)."},{"key":"2024_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Q., Liu, Z., Zhang, Y., Fu, K., Zhao, Q., & Du, H. (2021). Rgb-d salient object detection via 3d convolutional neural networks. In AAAI (pp. 1063\u20131071).","DOI":"10.1609\/aaai.v35i2.16191"},{"key":"2024_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Q., Liu, Z., Zhang, Y., Fu, K., Zhao, Q., & Du, H. (2021). Rgb-d salient object detection via 3d convolutional neural networks. In AAAI (pp. 1063\u20131071).","DOI":"10.1609\/aaai.v35i2.16191"},{"key":"2024_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Lin, K.-Y., Wang, J., Wu, W., Qian, C., Li, H., & Zeng, G. (2020). Bi-directional cross-modality feature propagation with separation-and-aggregation gate for rgb-d semantic segmentation. In ECCV (pp. 561\u2013577).","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"2024_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, J., Tsai, Y.-H., Wang, S., & Yang, M.-H. (2017). Segflow: Joint learning for video object segmentation and optical flow. In ICCV (pp. 686\u2013695).","DOI":"10.1109\/ICCV.2017.81"},{"key":"2024_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Cai, R., Li, Z., Zhao, X., & Huang, K. (2017). Locality-sensitive deconvolution networks with gated fusion for rgb-d indoor semantic segmentation. In CVPR (pp. 3029\u20133037).","DOI":"10.1109\/CVPR.2017.161"},{"key":"2024_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Fu, H., Wei, X., Xiao, J., & Cao, X. (2014). Depth enhanced saliency detection method. In ICIMCS (p. 23)","DOI":"10.1145\/2632856.2632866"},{"key":"2024_CR9","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P-T De Boer","year":"2005","unstructured":"De Boer, P.-T., Kroese, D. P., Mannor, S., & Rubinstein, R. Y. (2005). A tutorial on the cross-entropy method. Annals of operations research, 134, 19\u201367.","journal-title":"Annals of operations research"},{"key":"2024_CR10","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In CVPR (pp. 248\u2013255).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2024_CR11","doi-asserted-by":"crossref","unstructured":"Deng, Z., Hu, X., Zhu, L., Xu, X., Qin, J., Han, G., & Heng, P.-A. (2018). R3net: Recurrent residual refinement network for saliency detection. In IJCAI (pp. 684\u2013690).","DOI":"10.24963\/ijcai.2018\/95"},{"key":"2024_CR12","doi-asserted-by":"crossref","unstructured":"Faisal, M., Akhter, I., Ali, M., & Hartley, R. (2019). Exploiting geometric constraints on dense trajectories for motion saliency. 3(4). arXiv preprint arXiv:1909.13258","DOI":"10.1109\/WACV45572.2020.9093589"},{"key":"2024_CR13","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Cheng, M.-M., Liu, Y., Li, T., & Borji, A. (2017). Structure-measure: A new way to evaluate foreground maps. In ICCV (pp. 4548\u20134557).","DOI":"10.1109\/ICCV.2017.487"},{"key":"2024_CR14","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Gong, C., Cao, Y., Ren, B., Cheng, M.-M., & Borji, A. (2018). Enhanced-alignment measure for binary foreground map evaluation. arXiv preprint arXiv:1805.10421).","DOI":"10.24963\/ijcai.2018\/97"},{"key":"2024_CR15","first-page":"2075","volume":"32","author":"D-P Fan","year":"2020","unstructured":"Fan, D.-P., Lin, Z., Zhang, Z., Zhu, M., & Cheng, M.-M. (2020). Rethinking rgb-d salient object detection: Models, data sets, and large-scale benchmarks. IEEE TNNLS, 32, 2075\u20132089.","journal-title":"IEEE TNNLS"},{"key":"2024_CR16","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Zhai, Y., Borji, A., Yang, J., & Shao, L. (2020). Bbs-net: Rgb-d salient object detection with a bifurcated backbone strategy network. In ECCV (pp. 275\u2013292).","DOI":"10.1007\/978-3-030-58610-2_17"},{"key":"2024_CR17","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Zhai, Y., Borji, A., Yang, J., & Shao, L. (2020). Bbs-net: Rgb-d salient object detection with a bifurcated backbone strategy network. In ECCV (pp. 275\u2013292).","DOI":"10.1007\/978-3-030-58610-2_17"},{"key":"2024_CR18","doi-asserted-by":"crossref","unstructured":"Fu, K., Fan, D.-P., Ji, G.-P., & Zhao, Q. (2020). Jl-dcf: Joint learning and densely-cooperative fusion framework for rgb-d salient object detection. In CVPR (pp. 3052\u20133062).","DOI":"10.1109\/CVPR42600.2020.00312"},{"key":"2024_CR19","doi-asserted-by":"crossref","unstructured":"Fu, K., Fan, D.-P., Ji, G.-P., & Zhao, Q. (2020). Jl-dcf: Joint learning and densely-cooperative fusion framework for rgb-d salient object detection. In CVPR (pp. 3052\u20133062).","DOI":"10.1109\/CVPR42600.2020.00312"},{"key":"2024_CR20","doi-asserted-by":"crossref","unstructured":"Fu, K., Fan, D.-P., Ji, G.-P., & Zhao, Q. (2020). Jl-dcf: Joint learning and densely-cooperative fusion framework for rgb-d salient object detection. In CVPR (pp. 3052\u20133062).","DOI":"10.1109\/CVPR42600.2020.00312"},{"key":"2024_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In ICCV (pp. 1026\u20131034).","DOI":"10.1109\/ICCV.2015.123"},{"key":"2024_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In CVPR (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"2024_CR23","doi-asserted-by":"crossref","unstructured":"Hou, Q., Cheng, M.-M., Hu, X., Borji, A., Tu, Z., & Torr, P. H. S. (2017). Deeply supervised salient object detection with short connections. In CVPR (pp. 3203\u20133212).","DOI":"10.1109\/CVPR.2017.563"},{"key":"2024_CR24","doi-asserted-by":"crossref","unstructured":"Hui, T.-W., Tang, X., & Change\u00a0Loy, C. (2018). Liteflownet: A lightweight convolutional neural network for optical flow estimation. In CVPR (pp. 8981\u20138989).","DOI":"10.1109\/CVPR.2018.00936"},{"key":"2024_CR25","doi-asserted-by":"crossref","unstructured":"Jain, S. D., Xiong, B., and Grauman, K. (2017). Fusionseg: Learning to combine motion and appearance for fully automatic segmentation of generic objects in videos. In CVPR (pp. 2117\u20132126).","DOI":"10.1109\/CVPR.2017.228"},{"key":"2024_CR26","doi-asserted-by":"crossref","unstructured":"Ji, G.-P., Fu, K., Wu, Z., Fan, D.-P., Shen, J., & Shao, L. (2021). Full-duplex strategy for video object segmentation. In ICCV (pp. 4922\u20134933)","DOI":"10.1109\/ICCV48922.2021.00488"},{"key":"2024_CR27","doi-asserted-by":"crossref","unstructured":"Ji, W., Li, J., Yu, S., Zhang, M., Piao, Y., Yao, S., Bi, Q., Ma, K., Zheng, Y., Lu, H., et\u00a0al. (2021). Calibrated rgb-d salient object detection. In CVPR (pp. 9471\u20139481).","DOI":"10.1109\/CVPR46437.2021.00935"},{"key":"2024_CR28","doi-asserted-by":"crossref","unstructured":"Ji, W., Li, J., Yu, S., Zhang, M., Piao, Y., Yao, S., Bi, Q., Ma, K., Zheng, Y., Lu, H., et\u00a0al. (2021). Calibrated rgb-d salient object detection. In CVPR (pp. 9471\u20139481).","DOI":"10.1109\/CVPR46437.2021.00935"},{"key":"2024_CR29","doi-asserted-by":"crossref","unstructured":"Ji, W., Li, J., Zhang, M., Piao, Y., & Lu, H. (2020). Accurate rgb-d salient object detection via collaborative learning. In ECCV (pp. 52\u201369).","DOI":"10.1007\/978-3-030-58523-5_4"},{"key":"2024_CR30","doi-asserted-by":"crossref","unstructured":"Ji, W., Li, J., Zhang, M., Piao, Y., & Lu, H. (2020). Accurate rgb-d salient object detection via collaborative learning. In ECCV (pp. 52\u201369)","DOI":"10.1007\/978-3-030-58523-5_4"},{"key":"2024_CR31","doi-asserted-by":"crossref","unstructured":"Ju, R., Ge, L., Geng, W., Ren, T., & Wu, G. (2014). Depth saliency based on anisotropic center-surround difference. In ICIP (pp. 1115\u20131119).","DOI":"10.1109\/ICIP.2014.7025222"},{"key":"2024_CR32","doi-asserted-by":"crossref","unstructured":"Jun\u00a0Koh, Y., & Kim, C.-S. (2017). Primary object segmentation in videos based on region augmentation and reduction. In CVPR (pp. 3442\u20133450).","DOI":"10.1109\/CVPR.2017.784"},{"key":"2024_CR33","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"2024_CR34","doi-asserted-by":"crossref","unstructured":"Li, S., Seybold, B., Vorobyov, A., Lei, X., & Jay\u00a0Kuo, C.-C. (2018). Unsupervised video object segmentation with motion-based bilateral networks. In ECCV (pp. 207\u2013223).","DOI":"10.1007\/978-3-030-01219-9_13"},{"key":"2024_CR35","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017). Feature pyramid networks for object detection. In CVPR (pp. 2117\u20132125).","DOI":"10.1109\/CVPR.2017.106"},{"key":"2024_CR36","doi-asserted-by":"publisher","first-page":"22475","DOI":"10.1007\/s11042-018-6056-8","volume":"77","author":"H Liu","year":"2018","unstructured":"Liu, H., Wenshan, W., Wang, X., & Qian, Y. (2018). Rgb-d joint modelling with scene geometric information for indoor semantic segmentation. Multimedia Tools and Applications, 77, 22475\u201322488.","journal-title":"Multimedia Tools and Applications"},{"key":"2024_CR37","doi-asserted-by":"crossref","unstructured":"Liu, J.-J., Hou, Q., Cheng, M.-M., Feng, J., & Jiang, J. (2019). A simple pooling-based design for real-time salient object detection. In CVPR (pp. 3917\u20133926).","DOI":"10.1109\/CVPR.2019.00404"},{"key":"2024_CR38","unstructured":"Liu, W., Rabinovich, A., & Berg, A.C. (2015). Parsenet: Looking wider to see better. arXiv preprint arXiv:1506.04579"},{"key":"2024_CR39","doi-asserted-by":"crossref","unstructured":"Lu, X., Wang, W., Ma, C., Shen, J, Shao, Ling, & Porikli, F. (2019). See more, know more: Unsupervised video object segmentation with co-attention siamese networks. In CVPR (pp. 3623\u20133632).","DOI":"10.1109\/CVPR.2019.00374"},{"key":"2024_CR40","doi-asserted-by":"crossref","unstructured":"Lukezic, A., Kart, U., Kapyla, J., Durmush, A., Kamarainen, J.-K., Matas, J., & Kristan, M. (2019). Cdtb: A color and depth visual object tracking dataset and benchmark. In ICCV (pp. 10013\u201310022).","DOI":"10.1109\/ICCV.2019.01011"},{"key":"2024_CR41","unstructured":"Niu, Y., Geng, Y., Li, X., & Liu, F. (2012). Leveraging stereopsis for saliency analysis. In CVPR (pp. 454\u2013461)."},{"key":"2024_CR42","unstructured":"Ocal, M., & Mustafa, A. (2020). Realmonodepth: Self-supervised monocular depth estimation for general scenes. arXiv preprint arXiv:2004.06267."},{"key":"2024_CR43","doi-asserted-by":"publisher","first-page":"1187","DOI":"10.1109\/TPAMI.2013.242","volume":"36","author":"P Ochs","year":"2013","unstructured":"Ochs, P., Malik, J., & Brox, T. (2013). Segmentation of moving objects by long term video analysis. IEEE TPAMI, 36, 1187\u20131200.","journal-title":"IEEE TPAMI"},{"key":"2024_CR44","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhang, L., Zhao, X., & Lu, H. (2020). Hierarchical dynamic filtering network for rgb-d salient object detection. In ECCV (pp. 235\u2013252).","DOI":"10.1007\/978-3-030-58595-2_15"},{"key":"2024_CR45","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhang, L., Zhao, X., & Lu, H. (2020). Hierarchical dynamic filtering network for rgb-d salient object detection. In ECCV (pp. 235\u2013252).","DOI":"10.1007\/978-3-030-58595-2_15"},{"key":"2024_CR46","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhao, X., Zhang, L., & Lu, H. (2020). Multi-scale interactive network for salient object detection. In CVPR (pp. 9413\u20139422).","DOI":"10.1109\/CVPR42600.2020.00943"},{"key":"2024_CR47","doi-asserted-by":"crossref","unstructured":"Papazoglou, A., & Ferrari, V. (2013). Fast object segmentation in unconstrained video. In ICCV (pp. 1777\u20131784).","DOI":"10.1109\/ICCV.2013.223"},{"key":"2024_CR48","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., Kopf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., Bai, J., & Chintala, S. (2019). Pytorch: An imperative style, high-performance deep learning library. In NeurIPS, (Vol.\u00a032)"},{"key":"2024_CR49","doi-asserted-by":"crossref","unstructured":"Peng, H., Li, B., Xiong, W., Hu, W., & Ji, R. (2014). Rgbd salient object detection: A benchmark and algorithms. In ECCV (pp. 92\u2013109).","DOI":"10.1007\/978-3-319-10578-9_7"},{"key":"2024_CR50","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Kr\u00e4henb\u00fchl, P., Pritch, Y., & Hornung, A. (2012). Saliency filters: Contrast based filtering for salient region detection. In CVPR (pp. 733\u2013740).","DOI":"10.1109\/CVPR.2012.6247743"},{"key":"2024_CR51","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Pont-Tuset, J., McWilliams, B., Van\u00a0Gool, L., Gross, M., & Sorkine-Hornung, A. (2016). A benchmark dataset and evaluation methodology for video object segmentation. In CVPR (pp. 724\u2013732).","DOI":"10.1109\/CVPR.2016.85"},{"key":"2024_CR52","doi-asserted-by":"crossref","unstructured":"Piao, Y., Ji, W., Li, J., Zhang, M., & Lu, H. (2019). Depth-induced multi-scale recurrent attention network for saliency detection. In ICCV (pp. 7254\u20137263).","DOI":"10.1109\/ICCV.2019.00735"},{"key":"2024_CR53","doi-asserted-by":"crossref","unstructured":"Pillai, S., Ambru\u015f, R., & Gaidon, A. (2019). Superdepth: Self-supervised, super-resolved monocular depth estimation. In ICRA (pp. 9250\u20139256).","DOI":"10.1109\/ICRA.2019.8793621"},{"key":"2024_CR54","doi-asserted-by":"crossref","unstructured":"Prest, A., Leistner, C., Civera, J., Schmid, C., & Ferrari, V. (2012). Learning object class detectors from weakly annotated video. In CVPR (pp. 3282\u20133289).","DOI":"10.1109\/CVPR.2012.6248065"},{"key":"2024_CR55","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107404","volume":"106","author":"X Qin","year":"2020","unstructured":"Qin, X., Zhang, Z., Huang, C., Dehghan, M., Zaiane, O. R., & Jagersand, M. (2020). U2-net: Going deeper with nested u-structure for salient object detection. Pattern Recognition, 106, 107404.","journal-title":"Pattern Recognition"},{"key":"2024_CR56","doi-asserted-by":"crossref","unstructured":"Qin, X., Zhang, Z., Huang, C., Gao, C., Dehghan, M., & Jagersand, M. (2019). Basnet: Boundary-aware salient object detection. In CVPR (pp. 7479\u20137489).","DOI":"10.1109\/CVPR.2019.00766"},{"key":"2024_CR57","unstructured":"Ranftl, R., Lasinger, K., Hafner, D., Schindler, K., & Koltun, V. (2020). Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. In IEEE TPAMI."},{"key":"2024_CR58","doi-asserted-by":"crossref","unstructured":"Ranjan, A., & Black, M.J. (2017). Optical flow estimation using a spatial pyramid network. In CVPR (pp. 4161\u20134170).","DOI":"10.1109\/CVPR.2017.291"},{"key":"2024_CR59","doi-asserted-by":"publisher","first-page":"750","DOI":"10.3390\/s19040750","volume":"19","author":"M Rasoulidanesh","year":"2019","unstructured":"Rasoulidanesh, M., Yadav, S., Herath, S., Vaghei, Y., & Payandeh, S. (2019). Deep attention models for human tracking using rgbd. Sensors, 19, 750.","journal-title":"Sensors"},{"key":"2024_CR60","doi-asserted-by":"crossref","unstructured":"Ren, S., Liu, W., Liu, Y., Chen, H., Han, G., & He, S. (2021). Reciprocal transformations for unsupervised video object segmentation. In CVPR (pp. 15455\u201315464).","DOI":"10.1109\/CVPR46437.2021.01520"},{"key":"2024_CR61","doi-asserted-by":"crossref","unstructured":"Siam, M., Jiang, C., Lu, S., Petrich, L., Gamal, M., Elhoseiny, M., & Jagersand, M. (2019). Video object segmentation using teacher-student adaptation in a human robot interaction (hri) setting. In ICRA (pp. 50\u201356).","DOI":"10.1109\/ICRA.2019.8794254"},{"key":"2024_CR62","doi-asserted-by":"crossref","unstructured":"Song, H., Wang, W., Zhao, S., Shen, J., & Lam, K.-M. (2018). Pyramid dilated deeper convlstm for video salient object detection. In ECCV (pp. 715\u2013731).","DOI":"10.1007\/978-3-030-01252-6_44"},{"key":"2024_CR63","doi-asserted-by":"crossref","unstructured":"Sun, D., Yang, X., Liu, M.Y., & Kautz, J. (2018). Pwc-net: Cnns for optical flow using pyramid, warping, and cost volume. In CVPR (pp. 8934\u20138943).","DOI":"10.1109\/CVPR.2018.00931"},{"key":"2024_CR64","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, W., Wang, H., Li, S., & Li, X. (2021). Deep rgb-d saliency detection with depth-sensitive attention and automatic multi-modal fusion. In CVPR (pp. 1407\u20131417).","DOI":"10.1109\/CVPR46437.2021.00146"},{"key":"2024_CR65","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, W., Wang, H., Li, S., & Li, X. (2021). Deep rgb-d saliency detection with depth-sensitive attention and automatic multi-modal fusion. In CVPR (pp. 1407\u20131417).","DOI":"10.1109\/CVPR46437.2021.00146"},{"key":"2024_CR66","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2020). Raft: Recurrent all-pairs field transforms for optical flow. In ECCV (pp. 402\u2013419).","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"2024_CR67","doi-asserted-by":"crossref","unstructured":"Tokmakov, P., Alahari, K., & Schmid, C. (2017). Learning motion patterns in videos. In CVPR (pp. 3386\u20133394).","DOI":"10.1109\/CVPR.2017.64"},{"key":"2024_CR68","doi-asserted-by":"crossref","unstructured":"Tokmakov, P., Alahari, K., & Schmid, C. (2017). Learning video object segmentation with visual memory. In ICCV (pp. 4481\u20134490).","DOI":"10.1109\/ICCV.2017.480"},{"key":"2024_CR69","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H., Zhong, G., & Yang, M.-H. (2016). Semantic co-segmentation in videos. In ECCV (pp. 760\u2013775).","DOI":"10.1007\/978-3-319-46493-0_46"},{"key":"2024_CR70","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). Attention is all you need. In NeurIPS (pp. 5998-6008)"},{"key":"2024_CR71","doi-asserted-by":"crossref","unstructured":"Wang, W., & Neumann, U. (2018). Depth-aware cnn for rgb-d segmentation. In ECCV (pp. 135\u2013150).","DOI":"10.1007\/978-3-030-01252-6_9"},{"key":"2024_CR72","doi-asserted-by":"crossref","unstructured":"Wang, W., Lu, X., Shen, J., Crandall, D.J., & Shao, L. (2019). Zero-shot video object segmentation via attentive graph neural networks. In ICCV (pp. 9236\u20139245).","DOI":"10.1109\/ICCV.2019.00933"},{"key":"2024_CR73","doi-asserted-by":"crossref","unstructured":"Wang, W., Shen, J., & Porikli, F. (2015). Saliency-aware geodesic video object segmentation. In CVPR (pp. 3395\u20133402).","DOI":"10.1109\/CVPR.2015.7298961"},{"key":"2024_CR74","doi-asserted-by":"crossref","unstructured":"Wang, W., Song, H., Zhao, S., Shen, J., Zhao, S., Hoi, S. C. H., & Ling, H. (2019). Learning unsupervised video object segmentation through visual attention. In CVPR (pp. 3064\u20133074).","DOI":"10.1109\/CVPR.2019.00318"},{"key":"2024_CR75","doi-asserted-by":"crossref","unstructured":"Wang, Z., Simoncelli, E. P., & Bovik, A. C. (2003). Multiscale structural similarity for image quality assessment. In The Thrity-Seventh Asilomar Conference on Signals, Systems & Computers) 2003 (Vol. 2, pp. 1398\u20131402).","DOI":"10.1109\/ACSSC.2003.1292216"},{"key":"2024_CR76","doi-asserted-by":"crossref","unstructured":"Wei, J., Wang, S., & Huang, Q. (2020). $$\\text{F}^3$$net: fusion, feedback and focus for salient object detection. In AAAI (pp. 12321\u201312328).","DOI":"10.1609\/aaai.v34i07.6916"},{"key":"2024_CR77","unstructured":"Yang, G., & Ramanan, D. (2019). Volumetric correspondence networks for optical flow. In NeurIPS (pp. 794\u2013805)."},{"key":"2024_CR78","doi-asserted-by":"crossref","unstructured":"Yang, S., Zhang, L., Qi, J., Lu, H., Wang, S., & Zhang, X. (2021). Learning motion-appearance co-attention for zero-shot video object segmentation. In ICCV (pp. 1564\u20131573).","DOI":"10.1109\/ICCV48922.2021.00159"},{"key":"2024_CR79","doi-asserted-by":"crossref","unstructured":"Zhang, L., Dai, J., Lu, H., He, Y., & Wang, G. (2018). A bi-directional message passing model for salient object detection. In CVPR (pp. 1741\u20131750).","DOI":"10.1109\/CVPR.2018.00187"},{"key":"2024_CR80","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhang, J., Lin, Z., M\u011bch, R., Lu, H., & He, Y. (2020). Unsupervised video object segmentation with joint hotspot tracking. In ECCV (pp. 490\u2013506).","DOI":"10.1007\/978-3-030-58568-6_29"},{"key":"2024_CR81","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wang, T., Qi, J., Lu, H., & Wang, G. (2018). Progressive attention guided recurrent network for salient object detection. In CVPR (pp. 714\u2013722).","DOI":"10.1109\/CVPR.2018.00081"},{"key":"2024_CR82","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Cui, Z., Xu, C., Yan, Y., Sebe, N., & Yang, J. (2019). Pattern-affinitive propagation across depth, surface normal and semantic segmentation. In CVPR (pp. 4106\u20134115).","DOI":"10.1109\/CVPR.2019.00423"},{"key":"2024_CR83","doi-asserted-by":"crossref","unstructured":"Zhao, J.-X., Liu, J.-J., Fan, D.-P., Cao, Y., Yang, J., & Cheng, M.-M. (2019). Egnet: Edge guidance network for salient object detection. In ICCV (pp. 8779\u20138788).","DOI":"10.1109\/ICCV.2019.00887"},{"key":"2024_CR84","doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhao, Y., Li, J., & Chen, X. (2020). Is depth really necessary for salient object detection? In ACM MM (pp. 1745\u20131754).","DOI":"10.1145\/3394171.3413855"},{"key":"2024_CR85","doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhao, Y., Li, J., & Chen, X. (2020). Is depth really necessary for salient object detection? In ACM MM (pp. 1745\u20131754).","DOI":"10.1145\/3394171.3413855"},{"key":"2024_CR86","doi-asserted-by":"crossref","unstructured":"Zhao, S., Sheng, Y., Dong, Y., Chang, E. I., Xu, Y., et\u00a0al. (2020). Maskflownet: Asymmetric feature matching with learnable occlusion mask. In CVPR (pp. 6278\u20136287).","DOI":"10.1109\/CVPR42600.2020.00631"},{"key":"2024_CR87","doi-asserted-by":"crossref","unstructured":"Zhao, T., & Wu, X. (2019). Pyramid feature attention network for saliency detection. In CVPR (pp. 3085\u20133094).","DOI":"10.1109\/CVPR.2019.00320"},{"key":"2024_CR88","doi-asserted-by":"crossref","unstructured":"Zhao, X., Pang, Y., Yang, J., Zhang, L., & Lu, H. (2021). Multi-source fusion and automatic predictor selection for zero-shot video object segmentation. In ACM MM (pp. 2645\u20132653).","DOI":"10.1145\/3474085.3475192"},{"key":"2024_CR89","doi-asserted-by":"crossref","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H., & Ruan, X. (2022). Self-supervised pretraining for rgb-d salient object detection. In AAAI).","DOI":"10.1609\/aaai.v36i3.20257"},{"key":"2024_CR90","doi-asserted-by":"crossref","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H., & Zhang, L. (2020). Suppress and balance: A simple gated network for salient object detection. In ECCV (pp. 35\u201351).","DOI":"10.1007\/978-3-030-58536-5_3"},{"key":"2024_CR91","doi-asserted-by":"crossref","unstructured":"Zhao, X., Zhang, L., Pang, Y., Lu, H., & Zhang, L. (2020). A single stream network for robust and real-time rgb-d salient object detection. In ECCV (pp. 646\u2013662).","DOI":"10.1007\/978-3-030-58542-6_39"},{"key":"2024_CR92","doi-asserted-by":"crossref","unstructured":"Zhen, M., Li, S., Zhou, L., Shang, J., Feng, H., Fang, T., & Quan, L. (2020). Learning discriminative feature with crf for unsupervised video object segmentation. In ECCV (pp. 445\u2013462).","DOI":"10.1007\/978-3-030-58583-9_27"},{"key":"2024_CR93","doi-asserted-by":"crossref","unstructured":"Zhou, T., Fu, H., Chen, G., Zhou, Y., Fan, D.-P., & Shao, L. (2021). Specificity-preserving rgb-d saliency detection. In ICCV (pp. 4681\u20134691).","DOI":"10.1109\/ICCV48922.2021.00464"},{"key":"2024_CR94","doi-asserted-by":"crossref","unstructured":"Zhou, T., Fu, H., Chen, G., Zhou, Y., Fan, D.-P., & Shao, L. (2021). Specificity-preserving rgb-d saliency detection. In ICCV (pp. 4681\u20134691).","DOI":"10.1109\/ICCV48922.2021.00464"},{"key":"2024_CR95","doi-asserted-by":"crossref","unstructured":"Zhou, T., Wang, S., Zhou, Y., Yao, Y., Li, J., & Shao, L. (2020). Motion-attentive transition for zero-shot video object segmentation. In AAAI (pp. 13066\u201313073).","DOI":"10.1609\/aaai.v34i07.7008"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02024-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02024-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02024-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T14:29:12Z","timestamp":1720708152000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02024-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,7]]},"references-count":95,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["2024"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02024-8","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,7]]},"assertion":[{"value":"7 January 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}