{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T14:45:39Z","timestamp":1766587539831},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,11,9]],"date-time":"2023-11-09T00:00:00Z","timestamp":1699488000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,9]],"date-time":"2023-11-09T00:00:00Z","timestamp":1699488000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s11760-023-02827-1","type":"journal-article","created":{"date-parts":[[2023,11,9]],"date-time":"2023-11-09T10:01:59Z","timestamp":1699524119000},"page":"1305-1316","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["SiamMaskAttn: inverted residual attention block fusing multi-scale feature information for multitask visual object tracking networks"],"prefix":"10.1007","volume":"18","author":[{"given":"Xiaofeng","family":"Bian","sequence":"first","affiliation":[]},{"given":"Chenggang","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,9]]},"reference":[{"key":"2827_CR1","doi-asserted-by":"crossref","unstructured":"Bao, L., Wu, B., Liu, W.: CNN in MRF: Video object segmentation via inference in a CNN-based higher-order spatio-temporal MRF. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5977\u20135986 (2018)","DOI":"10.1109\/CVPR.2018.00626"},{"key":"2827_CR2","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, JF., et\u00a0al.: Fully-convolutional siamese networks for object tracking. In: Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8\u201310 and 15\u201316, 2016, Proceedings, Part II 14, pp. 850\u2013865 , Springer, Berlin (2016)","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"2827_CR3","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, LV., et\u00a0al.: Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6182\u20136191 (2019)","DOI":"10.1109\/ICCV.2019.00628"},{"key":"2827_CR4","doi-asserted-by":"crossref","unstructured":"Caelles, S., Maninis, KK., Pont-Tuset, J., et\u00a0al.: One-shot video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 221\u2013230 (2017)","DOI":"10.1109\/CVPR.2017.565"},{"key":"2827_CR5","unstructured":"Chen, B.X., Tsotsos, J.K.: Fast visual object tracking with rotated bounding boxes. (2019) arXiv preprint arXiv:1907.03892"},{"key":"2827_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., et\u00a0al.: Transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"2827_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Pont-Tuset, J., Montes, A., et\u00a0al.: Blazingly fast video object segmentation with pixel-wise metric learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1189\u20131198 (2018)","DOI":"10.1109\/CVPR.2018.00130"},{"key":"2827_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, J., Tsai, Y.H., Wang, S., et\u00a0al.: Segflow: joint learning for video object segmentation and optical flow. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 686\u2013695 (2017)","DOI":"10.1109\/ICCV.2017.81"},{"key":"2827_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, J., Tsai, Y.H., Hung, W.C., et\u00a0al.: Fast and accurate online video object segmentation via tracking parts. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7415\u20137424 (2018)","DOI":"10.1109\/CVPR.2018.00774"},{"key":"2827_CR10","unstructured":"Cho, S., Lee, H., Woo, S., et\u00a0al.: Pmvos: pixel-level matching-based video object segmentation (2020) arXiv preprint arXiv:2009.08855"},{"key":"2827_CR11","doi-asserted-by":"crossref","unstructured":"Chu, Q., Ouyang, W., Li, H., et\u00a0al.: Online multi-object tracking using cnn-based single object tracker with spatial-temporal attention mechanism. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4836\u20134845 (2017)","DOI":"10.1109\/ICCV.2017.518"},{"key":"2827_CR12","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Shahbaz\u00a0Khan, F., et\u00a0al.: Eco: efficient convolution operators for tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6638\u20136646 (2017)","DOI":"10.1109\/CVPR.2017.733"},{"key":"2827_CR13","unstructured":"G\u00fcndo\u011fdu, E., Alatan, A.A.: The visual object tracking vot2016 challenge results (2016)"},{"key":"2827_CR14","doi-asserted-by":"crossref","unstructured":"He, A., Luo, C., Tian. X., et\u00a0al.: Towards a better match in siamese network based visual object tracker. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, (2018)","DOI":"10.1007\/978-3-030-11009-3_7"},{"key":"2827_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., et\u00a0al.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2827_CR16","doi-asserted-by":"crossref","unstructured":"Howard, A., Sandler, M., Chu, G., et\u00a0al.: Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"2827_CR17","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"2827_CR18","doi-asserted-by":"crossref","unstructured":"Jampani, V., Gadde, R., Gehler, P.V.: Video propagation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 451\u2013461 (2017)","DOI":"10.1109\/CVPR.2017.336"},{"key":"2827_CR19","unstructured":"Kristan, M., Leonardis, A., Matas, J., et\u00a0al.: The sixth visual object tracking vot2018 challenge results. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, (2018)"},{"key":"2827_CR20","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., et\u00a0al.: High performance visual tracking with siamese region proposal network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8971\u20138980 (2018)","DOI":"10.1109\/CVPR.2018.00935"},{"key":"2827_CR21","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., et\u00a0al.: Siamrpn++: Evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"2827_CR22","doi-asserted-by":"crossref","unstructured":"Li, X., Loy, C.C.: Video object segmentation with joint re-identification and attention-aware mask propagation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 90\u2013105 (2018)","DOI":"10.1007\/978-3-030-01219-9_6"},{"key":"2827_CR23","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., et\u00a0al.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2827_CR24","doi-asserted-by":"crossref","unstructured":"Oh, S.W., Lee, J.Y., Sunkavalli, K., et\u00a0al.: Fast video object segmentation by reference-guided mask propagation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7376\u20137385 (2018)","DOI":"10.1109\/CVPR.2018.00770"},{"key":"2827_CR25","doi-asserted-by":"crossref","unstructured":"Oh, S.W., Lee, J.Y., Xu, N., et\u00a0al.: Video object segmentation using space-time memory networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9226\u20139235 (2019)","DOI":"10.1109\/ICCV.2019.00932"},{"key":"2827_CR26","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Pont-Tuset, J., McWilliams, B., et\u00a0al.: A benchmark dataset and evaluation methodology for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 724\u2013732 (2016)","DOI":"10.1109\/CVPR.2016.85"},{"key":"2827_CR27","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Khoreva, A., Benenson, R., et\u00a0al.: Learning video object segmentation from static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2663\u20132672 (2017a)","DOI":"10.1109\/CVPR.2017.372"},{"key":"2827_CR28","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Khoreva, A., Benenson, R., et\u00a0al.: Learning video object segmentation from static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2663\u20132672 (2017b)","DOI":"10.1109\/CVPR.2017.372"},{"key":"2827_CR29","doi-asserted-by":"crossref","unstructured":"Pinheiro, P.O., Lin, T.Y., Collobert, R., et\u00a0al.: Learning to refine object segments. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 75\u201391. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_5"},{"key":"2827_CR30","unstructured":"Pont-Tuset, J., Perazzi, F., Caelles, S., et\u00a0al.: The 2017 davis challenge on video object segmentation (2017) arXiv preprint arXiv:1704.00675"},{"key":"2827_CR31","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention - MICCAI, pp. 234\u2013241 (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2827_CR32","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"2827_CR33","doi-asserted-by":"crossref","unstructured":"Shin\u00a0Yoon, J., Rameau, F., Kim, J., et\u00a0al.: Pixel-level matching for video object segmentation using convolutional neural networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2167\u20132176 (2017)","DOI":"10.1109\/ICCV.2017.238"},{"key":"2827_CR34","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et\u00a0al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"2827_CR35","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Leibe, B.: Online adaptation of convolutional neural networks for the 2017 davis challenge on video object segmentation. In: The 2017 DAVIS Challenge on Video Object Segmentation-CVPR Workshops (2017)","DOI":"10.5244\/C.31.116"},{"key":"2827_CR36","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiang, M., Qian, C., et\u00a0al.: Residual attention network for image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2017)","DOI":"10.1109\/CVPR.2017.683"},{"key":"2827_CR37","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., et\u00a0al.: Fast online object tracking and segmentation: a unifying approach. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1328\u20131338 (2019)","DOI":"10.1109\/CVPR.2019.00142"},{"key":"2827_CR38","doi-asserted-by":"crossref","unstructured":"Wang, Q., Wu, B., Zhu, P., et\u00a0al.: Eca-net: Efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11534\u201311542 (2020)","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"2827_CR39","unstructured":"Xu, N., Yang, L., Fan, Y., et\u00a0al.: Youtube-vos: a large-scale video object segmentation benchmark (2018) arXiv preprint arXiv:1809.03327"},{"key":"2827_CR40","doi-asserted-by":"crossref","unstructured":"Yan, B., Zhang, X., Wang, D., et\u00a0al.: Alpha-refine: boosting tracking performance by precise bounding box estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5289\u20135298 (2021)","DOI":"10.1109\/CVPR46437.2021.00525"},{"key":"2827_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106079","volume":"203","author":"K Yang","year":"2020","unstructured":"Yang, K., He, Z., Zhou, Z., et al.: Siamatt: Siamese attention network for visual tracking. Knowledge-based systems 203, 106079 (2020)","journal-title":"Knowledge-based systems"},{"key":"2827_CR42","doi-asserted-by":"crossref","unstructured":"Yang, L., Wang, Y., Xiong, X., et\u00a0al.: Efficient video object segmentation via network modulation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6499\u20136507 (2018)","DOI":"10.1109\/CVPR.2018.00680"},{"key":"2827_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Peng, H., Fu, J., et\u00a0al.: Ocean: Object-aware anchor-free tracking. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16, pp. 771\u2013787. Springer (2020)","DOI":"10.1007\/978-3-030-58589-1_46"},{"key":"2827_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Wang, Q., Li, B., et\u00a0al.: Distractor-aware siamese networks for visual object tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 101\u2013117 (2018)","DOI":"10.1007\/978-3-030-01240-3_7"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02827-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-023-02827-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02827-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T07:10:18Z","timestamp":1708413018000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-023-02827-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,9]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["2827"],"URL":"https:\/\/doi.org\/10.1007\/s11760-023-02827-1","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,9]]},"assertion":[{"value":"17 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 September 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}