{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T02:16:10Z","timestamp":1769480170211,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s11760-025-05068-6","type":"journal-article","created":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T20:17:28Z","timestamp":1768421848000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Visual tracking based on spatiotemporal deformable mamba"],"prefix":"10.1007","volume":"20","author":[{"given":"Ruixu","family":"Wu","sequence":"first","affiliation":[]},{"given":"Yanli","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xiaogang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,14]]},"reference":[{"issue":"3","key":"5068_CR1","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2014.2345390","volume":"37","author":"JF Henriques","year":"2015","unstructured":"Henriques, J.F., Caseiro, R., Martins, P., et al.: High-speed tracking with kernelized correlation filters. IEEE Trans. Pattern Anal. Mach. Intell. 37(3), 583\u2013596 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"2","key":"5068_CR2","doi-asserted-by":"publisher","first-page":"1201","DOI":"10.1007\/s11042-014-2364-9","volume":"75","author":"I Bouchrika","year":"2016","unstructured":"Bouchrika, I., Carter, J.N., Nixon, M.S.: Towards automated visual surveillance using gait for identity recognition and tracking across multiple non-intersecting cameras. Multimed. Tools Appl. 75(2), 1201\u20131221 (2016)","journal-title":"Multimed. Tools Appl."},{"key":"5068_CR3","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., et\u00a0al.: Transformer tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"5068_CR4","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., et\u00a0al.: Learning spatio-temporal transformer for visual tracking. In: International Conference on Computer Vision (ICCV), pp 10448\u201310457 (2021)","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"5068_CR5","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., et\u00a0al.: Transformer meets tracker: Exploiting temporal context for robust visual tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 1571\u20131580 (2021)","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"5068_CR6","first-page":"103031","volume":"37","author":"Y Liu","year":"2024","unstructured":"Liu, Y., Tian, Y., Zhao, Y., et al.: Vmamba: Visual state space model. Adv. Neural. Inf. Process. Syst. 37, 103031\u2013103063 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"5068_CR7","doi-asserted-by":"crossref","unstructured":"Liu, L., Zhang, M., Yin, J., et\u00a0al.: Defmamba: Deformable visual state space model. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 8838\u20138847 (2025)","DOI":"10.1109\/CVPR52734.2025.00826"},{"key":"5068_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s44267-024-00068-5","volume":"2","author":"C Liu","year":"2024","unstructured":"Liu, C., Yuan, Y., Chen, X., et al.: Spatial-temporal initialization dilemma: towards realistic visual tracking. Vis. Intell. 2, 1\u201316 (2024)","journal-title":"Vis. Intell."},{"key":"5068_CR9","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, J., Cui, Y., et\u00a0al.: Siamcar: Siamese fully convolutional classification and regression for visual tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 6269\u20136277 (2020)","DOI":"10.1109\/CVPR42600.2020.00630"},{"key":"5068_CR10","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Peng, H., Fu, J., et\u00a0al.: Ocean: Object-aware anchor-free tracking. In: European Conference on Computer Vision (ECCV), Springer, pp 771\u2013787 (2020)","DOI":"10.1007\/978-3-030-58589-1_46"},{"key":"5068_CR11","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., et\u00a0al.: Siamfc++: Towards robust and accurate visual tracking with target estimation guidelines. In: AAAI Conference on Artificial Intelligence (AAAI), pp 12549\u201312556 (2020)","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"5068_CR12","doi-asserted-by":"crossref","unstructured":"Borsuk, V., Vei, R., Kupyn, O., et\u00a0al.: Fear: Fast, efficient, accurate and robust visual tracker. In: European Conference on Computer Vision (ECCV), Springer, pp 644\u2013663 (2022)","DOI":"10.1007\/978-3-031-20047-2_37"},{"key":"5068_CR13","doi-asserted-by":"crossref","unstructured":"Fu, Z., Fu, Z., Liu, Q., et\u00a0al.: Sparsett: Visual tracking with sparse transformers. arXiv preprint arXiv:2205.03776 (2022)","DOI":"10.24963\/ijcai.2022\/127"},{"key":"5068_CR14","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jiang, C., Wang, L., et\u00a0al.: Mixformer: End-to-end tracking with iterative mixed attention. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 13608\u201313618 (2022)","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"5068_CR15","doi-asserted-by":"crossref","unstructured":"Gao, S., Zhou, C., Ma, C., et\u00a0al.: Aiatrack: Attention in attention for transformer visual tracking. arXiv preprint arXiv:2207.09603 (2022)","DOI":"10.1007\/978-3-031-20047-2_9"},{"key":"5068_CR16","doi-asserted-by":"crossref","unstructured":"Song, Z., Luo, R., Yu, J., et\u00a0al.: Compact transformer tracker with correlative masked modeling. arXiv preprint arXiv:2301.10938 (2023)","DOI":"10.1609\/aaai.v37i2.25327"},{"key":"5068_CR17","doi-asserted-by":"crossref","unstructured":"Wei, X., Bai, Y., Zheng, Y., et\u00a0al.: Autoregressive visual tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 9697\u20139706 (2023)","DOI":"10.1109\/CVPR52729.2023.00935"},{"key":"5068_CR18","doi-asserted-by":"crossref","unstructured":"Xiao, C., Cao, Q., Luo, Z., et\u00a0al.: Mambatrack: a simple baseline for multiple object tracking with state space model. In: Proceedings of the 32nd ACM international conference on multimedia, pp 4082\u20134091 (2024)","DOI":"10.1145\/3664647.3680944"},{"key":"5068_CR19","doi-asserted-by":"crossref","unstructured":"Chen, X,, Kang, B., Geng, W., et\u00a0al.: Sutrack: Towards simple and unified single object tracking. In: AAAI Conference on Artificial Intelligence (AAAI), pp 2239\u20132247 (2025)","DOI":"10.1609\/aaai.v39i2.32223"},{"key":"5068_CR20","doi-asserted-by":"crossref","unstructured":"Xue, Y., Jin, G., Zhong, B., et\u00a0al.: Fmtrack: Frequency-aware interaction and multi-expert fusion for rgb-t tracking. IEEE Transactions on Circuits and Systems for Video Technology (2025)","DOI":"10.1109\/TCSVT.2025.3601598"},{"key":"5068_CR21","doi-asserted-by":"crossref","unstructured":"Xue, C., Zhong, B., Liang, Q., et\u00a0al.: Similarity-guided layer-adaptive vision transformer for uav tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 6730\u20136740 (2025)","DOI":"10.1109\/CVPR52734.2025.00631"},{"key":"5068_CR22","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.neucom.2022.04.043","volume":"492","author":"H Saribas","year":"2022","unstructured":"Saribas, H., Cevikalp, H., Kpkl, O., et al.: Trat: Tracking by attention using spatio-temporal features. Neurocomputing 492, 150\u2013161 (2022)","journal-title":"Neurocomputing"},{"key":"5068_CR23","doi-asserted-by":"crossref","unstructured":"Chen, X., Peng, H., Wang, D., et\u00a0al.: Seqtrack: Sequence to sequence learning for visual object tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 14572\u201314581 (2023)","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"5068_CR24","doi-asserted-by":"crossref","unstructured":"Xie, J., Zhong, B., Mo, Z., et\u00a0al.: Autoregressive queries for adaptive tracking with spatio-temporal transformers. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 19300\u201319309 (2024)","DOI":"10.1109\/CVPR52733.2024.01826"},{"key":"5068_CR25","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Zhong, B., Liang, Q., et\u00a0al.: Odtrack: Online dense temporal token learning for visual tracking. In: AAAI Conference on Artificial Intelligence (AAAI), pp 7588\u20137596 (2024)","DOI":"10.1609\/aaai.v38i7.28591"},{"key":"5068_CR26","first-page":"1","volume":"62","author":"B Lin","year":"2024","unstructured":"Lin, B., Zheng, J., Xue, C., et al.: Motion-aware correlation filter-based object tracking in satellite videos. IEEE Trans. Geosci. Remote Sens. 62, 1\u201313 (2024)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"5068_CR27","doi-asserted-by":"crossref","unstructured":"Xue, C., Zhong, B., Liang, Q., et\u00a0al.: Unifying motion and appearance cues for visual tracking via shared queries. IEEE Transactions on Circuits and Systems for Video Technology (2024)","DOI":"10.1109\/TCSVT.2024.3486347"},{"key":"5068_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107269","volume":"127","author":"R Wu","year":"2024","unstructured":"Wu, R., Wen, X., Yuan, L., et al.: Visual tracking based on deformable transformer and spatiotemporal information. Eng. Appl. Artif. Intell. 127, 107269 (2024)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"5068_CR29","unstructured":"Huang, L., Xin, et\u00a0al.: Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE transactions on pattern analysis and machine intelligence (2019)"},{"key":"5068_CR30","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., et\u00a0al.: Lasot: A high-quality benchmark for large-scale single object tracking. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 5374\u20135383 (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"key":"5068_CR31","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., et\u00a0al.: Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In: European Conference on Computer Vision (ECCV), pp 300\u2013317 (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"5068_CR32","doi-asserted-by":"crossref","unstructured":"Lin, TY., Maire, M., Belongie, S., et\u00a0al.: Microsoft coco: Common objects in context. In: European Conference on Computer Vision (ECCV), Springer, pp 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"5068_CR33","unstructured":"Loshchilov, I., Hutter, F.: Fixing weight decay regularization in adam. arXiv preprint arXiv:1711.05101 (2018)"},{"key":"5068_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.109897","volume":"256","author":"R Wu","year":"2022","unstructured":"Wu, R., Wen, X., Yuan, L., et al.: Dasftot: Dual attention spatiotemporal fused transformer for object tracking. Knowl.-Based Syst. 256, 109897 (2022)","journal-title":"Knowl.-Based Syst."},{"key":"5068_CR35","doi-asserted-by":"crossref","unstructured":"Mayer, C., Danelljan, M., Paudel, DP., et\u00a0al.: Learning target candidate association to keep track of what not to track. In: International Conference on Computer Vision (ICCV), pp 13444\u201313454 (2021)","DOI":"10.1109\/ICCV48922.2021.01319"},{"key":"5068_CR36","doi-asserted-by":"crossref","unstructured":"Chen, B., Li, P., Bai, L., et\u00a0al.: Backbone is all your need: A simplified architecture for visual object tracking. In: European Conference on Computer Vision (ECCV), Springer, pp 375\u2013392 (2022)","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"5068_CR37","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., et al.: Swintrack: A simple and strong baseline for transformer tracking. Adv. Neural. Inf. Process. Syst. 35, 16743\u201316754 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"5068_CR38","unstructured":"Kristan, M., Leonardis, A., Matas, J., et\u00a0al.: The eighth visual object tracking vot2020 challenge results. In: European Conference on Computer Vision (ECCV), Springer, pp 547\u2013601 (2020)"},{"key":"5068_CR39","doi-asserted-by":"crossref","unstructured":"Yan, B., Zhang, X., Wang, D., et\u00a0al.: Alpha-refine: Boosting tracking performance by precise bounding box estimation. In: International Conference on Computer Vision and Pattern Recogintion (CVPR), pp 5289\u20135298 (2021)","DOI":"10.1109\/CVPR46437.2021.00525"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-05068-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-05068-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-05068-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T15:44:05Z","timestamp":1769442245000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-05068-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["5068"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-05068-6","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"17 September 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 November 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 December 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"11"}}