{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T21:55:56Z","timestamp":1777758956554,"version":"3.51.4"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the Natural Science Foundation of Shandong Province"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00371-025-04344-3","type":"journal-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:08:50Z","timestamp":1776082130000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fetatrack: Foreground-aware dynamic template co-evolution for visual object tracking"],"prefix":"10.1007","volume":"42","author":[{"given":"Yibing","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongchao","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiqing","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aoran","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"issue":"7","key":"4344_CR1","doi-asserted-by":"publisher","first-page":"5519","DOI":"10.1109\/TCSVT.2024.3352573","volume":"34","author":"Y He","year":"2024","unstructured":"He, Y., Ma, Z., Wei, X., Gong, Y.: Knowledge synergy learning for multi-modal tracking. IEEE Trans. Circuits Syst. Video Technol. 34(7), 5519\u20135532 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"5","key":"4344_CR2","first-page":"6552","volume":"45","author":"S Javed","year":"2022","unstructured":"Javed, S., Danelljan, M., Khan, F.S., Khan, M.H., Felsberg, M., Matas, J.: Visual object tracking with discriminative filters and siamese networks: a survey and outlook. IEEE Trans. Pattern Anal. Mach. Intell. 45(5), 6552\u20136574 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4344_CR3","doi-asserted-by":"crossref","unstructured":"Kiani\u00a0Galoogahi, H., Fagg, A., Lucey, S.: Learning background-aware correlation filters for visual tracking. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1135\u20131143 (2017)","DOI":"10.1109\/ICCV.2017.129"},{"key":"4344_CR4","doi-asserted-by":"crossref","unstructured":"Li, F., Tian, C., Zuo, W., Zhang, L., Yang, M.-H.: Learning spatial-temporal regularized correlation filters for visual tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4904\u20134913 (2018)","DOI":"10.1109\/CVPR.2018.00515"},{"key":"4344_CR5","doi-asserted-by":"crossref","unstructured":"Nam, H., Han, B.: Learning multi-domain convolutional neural networks for visual tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4293\u20134302 (2016)","DOI":"10.1109\/CVPR.2016.465"},{"issue":"1","key":"4344_CR6","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/s00371-023-02774-5","volume":"40","author":"P Liu","year":"2024","unstructured":"Liu, P., Li, G., Zhao, W., Tang, X.: A coupling method of learning structured support correlation filters for visual tracking. Vis. Comput. 40(1), 181\u2013199 (2024)","journal-title":"Vis. Comput."},{"issue":"12","key":"4344_CR7","doi-asserted-by":"publisher","first-page":"8907","DOI":"10.1007\/s00371-024-03283-9","volume":"40","author":"Q Jing","year":"2024","unstructured":"Jing, Q., Zhang, P., Zhang, W., Lei, W.: An improved target tracking method based on extraction of corner points. Vis. Comput. 40(12), 8907\u20138926 (2024)","journal-title":"Vis. Comput."},{"key":"4344_CR8","doi-asserted-by":"crossref","unstructured":"Xie, F., Wang, C., Wang, G., Cao, Y., Yang, W., Zeng, W.: Correlation-aware deep tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8751\u20138760 (2022)","DOI":"10.1109\/CVPR52688.2022.00855"},{"key":"4344_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H.: Transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"4344_CR10","doi-asserted-by":"crossref","unstructured":"Ye, B., Chang, H., Ma, B., Shan, S., Chen, X.: Joint feature learning and relation modeling for tracking: a one-stream framework. In: European Conference on Computer Vision, pp. 341\u2013357. Springer (2022)","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"4344_CR11","doi-asserted-by":"crossref","unstructured":"Song, Z., Yu, J., Chen, Y.-P.P., Yang, W.: Transformer tracking with cyclic shifting window attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8791\u20138800 (2022)","DOI":"10.1109\/CVPR52688.2022.00859"},{"key":"4344_CR12","doi-asserted-by":"crossref","unstructured":"Mayer, C., Danelljan, M., Bhat, G., Paul, M., Paudel, D.P., Yu, F., Van\u00a0Gool, L.: Transforming model prediction for tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8731\u20138740 (2022)","DOI":"10.1109\/CVPR52688.2022.00853"},{"key":"4344_CR13","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., Yu, G.: Siamfc++: towards robust and accurate visual tracking with target estimation guidelines. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 12549\u201312556 (2020)","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"4344_CR14","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J.: Siamrpn++: evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"4344_CR15","doi-asserted-by":"crossref","unstructured":"Yang, T., Chan, A.B.: Learning dynamic memory networks for object tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 152\u2013167 (2018)","DOI":"10.1007\/978-3-030-01240-3_10"},{"key":"4344_CR16","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., Lu, H.: Learning spatio-temporal transformer for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10448\u201310457 (2021)","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"4344_CR17","doi-asserted-by":"crossref","unstructured":"Fu, Z., Liu, Q., Fu, Z., Wang, Y.: Stmtrack: Template-free visual tracking with space-time memory networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13774\u201313783 (2021)","DOI":"10.1109\/CVPR46437.2021.01356"},{"key":"4344_CR18","doi-asserted-by":"publisher","first-page":"58736","DOI":"10.52202\/075280-2561","volume":"36","author":"Y Cui","year":"2023","unstructured":"Cui, Y., Song, T., Wu, G., Wang, L.: Mixformerv2: efficient fully transformer tracking. Adv. Neural. Inf. Process. Syst. 36, 58736\u201358751 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4344_CR19","first-page":"10814","volume":"36","author":"X Zhou","year":"2023","unstructured":"Zhou, X., Guo, P., Hong, L., Li, J., Zhang, W., Ge, W., Zhang, W.: Reading relevant feature from global representation memory for visual object tracking. Adv. Neural. Inf. Process. Syst. 36, 10814\u201310827 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4344_CR20","doi-asserted-by":"crossref","unstructured":"Gu, F., Lu, J., Cai, C., Zhu, Q., Ju, Z.: RTSformer: A robust toroidal transformer with spatiotemporal features for visual tracking IEEE Trans. Hum. Mach. Syst. 54(2), 214\u2013225 (2024)","DOI":"10.1109\/THMS.2024.3370582"},{"key":"4344_CR21","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., Li, H.: Transformer meets tracker: exploiting temporal context for robust visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1571\u20131580 (2021)","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"4344_CR22","doi-asserted-by":"crossref","unstructured":"Gao, S., Zhou, C., Ma, C., Wang, X., Yuan, J.: Aiatrack: attention in attention for transformer visual tracking. In: European Conference on Computer Vision, pp. 146\u2013164. Springer (2022)","DOI":"10.1007\/978-3-031-20047-2_9"},{"key":"4344_CR23","doi-asserted-by":"crossref","unstructured":"Kugarajeevan, J., Kokul, T., Ramanan, A., Fernando, S.: Optimized information flow for transformer tracking. arXiv preprint arXiv:2402.08195 (2024)","DOI":"10.1016\/j.eswa.2024.125381"},{"key":"4344_CR24","doi-asserted-by":"publisher","first-page":"1432","DOI":"10.1109\/TIP.2024.3364056","volume":"33","author":"Y Chen","year":"2024","unstructured":"Chen, Y., Jiang, R., Zheng, Y., Sheng, B., Yang, Z.-X., Wu, E.: Dual branch multi-level semantic learning for few-shot segmentation. IEEE Trans. Image Process. 33, 1432\u20131447 (2024)","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"4344_CR25","doi-asserted-by":"publisher","first-page":"2201","DOI":"10.1002\/cav.2201","volume":"35","author":"X Zhu","year":"2024","unstructured":"Zhu, X., Yao, X., Zhang, J., Zhu, M., You, L., Yang, X., Zhang, J., Zhao, H., Zeng, D.: Tmsdnet: transformer with multi-scale dense network for single and multi-view 3d reconstruction. Comput. Anim. Virtual Worlds 35(1), 2201 (2024)","journal-title":"Comput. Anim. Virtual Worlds"},{"key":"4344_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.107905","volume":"132","author":"Y Zhou","year":"2024","unstructured":"Zhou, Y., He, K., Xu, D., Tao, D., Lin, X., Li, C.: Asfusion: adaptive visual enhancement and structural patch decomposition for infrared and visible image fusion. Eng. Appl. Artif. Intell. 132, 107905 (2024)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"4344_CR27","doi-asserted-by":"crossref","unstructured":"Wei, X., Bai, Y., Zheng, Y., Shi, D., Gong, Y.: Autoregressive visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9697\u20139706 (2023)","DOI":"10.1109\/CVPR52729.2023.00935"},{"key":"4344_CR28","doi-asserted-by":"crossref","unstructured":"Bai, Y., Zhao, Z., Gong, Y., Wei, X.: Artrackv2: prompting autoregressive tracker where to look and how to describe. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19048\u201319057 (2024)","DOI":"10.1109\/CVPR52733.2024.01802"},{"key":"4344_CR29","doi-asserted-by":"crossref","unstructured":"Chen, X., Peng, H., Wang, D., Lu, H., Hu, H.: Seqtrack: sequence to sequence learning for visual object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14572\u201314581 (2023)","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"4344_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2025.110482","volume":"149","author":"M Park","year":"2025","unstructured":"Park, M., Song, J., Yoon, S.M.: Visual object tracking using learnable target-aware token emphasis. Eng. Appl. Artif. Intell. 149, 110482 (2025)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"4344_CR31","doi-asserted-by":"crossref","unstructured":"Kang, B., Chen, X., Lai, S., Liu, Y., Liu, Y., Wang, D.: Exploring enhanced contextual information for video-level object tracking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 39, pp. 4194\u20134202 (2025)","DOI":"10.1609\/aaai.v39i4.32440"},{"key":"4344_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110981","volume":"158","author":"Y Feng","year":"2025","unstructured":"Feng, Y., Chen, F., Yu, J., Ji, Y., Wu, F., Liu, S., Jing, X.-Y.: Homogeneous and heterogeneous relational graph for visible-infrared person re-identification. Pattern Recogn. 158, 110981 (2025)","journal-title":"Pattern Recogn."},{"key":"4344_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111510","volume":"164","author":"Y Feng","year":"2025","unstructured":"Feng, Y., Chen, F., Sun, G., Wu, F., Ji, Y., Liu, T., Liu, S., Jing, X.-Y., Luo, J.: Learning multi-granularity representation with transformer for visible-infrared person re-identification. Pattern Recogn. 164, 111510 (2025)","journal-title":"Pattern Recogn."},{"key":"4344_CR34","doi-asserted-by":"publisher","first-page":"5096","DOI":"10.1109\/TASE.2024.3416533","volume":"22","author":"W Yan","year":"2024","unstructured":"Yan, W., Chen, Y., Zhou, W., Cong, R.: Mvoxti-dnerf: explicit multi-scale voxel interpolation and temporal encoding network for efficient dynamic neural radiance field. IEEE Trans. Autom. Sci. Eng. 22, 5096\u20135107 (2024)","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"4344_CR35","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"4344_CR36","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"issue":"1","key":"4344_CR37","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.vrih.2022.07.006","volume":"5","author":"M Zhang","year":"2023","unstructured":"Zhang, M., Tian, X.: Transformer architecture based on mutual attention for image-anomaly detection. Virtual Real. Intell. Hardw. 5(1), 57\u201367 (2023)","journal-title":"Virtual Real. Intell. Hardw."},{"key":"4344_CR38","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30, 5998\u20136008 (2017)"},{"key":"4344_CR39","unstructured":"Chen, T., Saxena, S., Li, L., Fleet, D.J., Hinton, G.: Pix2seq: a language modeling framework for object detection. arXiv preprint arXiv:2109.10852 (2021)"},{"key":"4344_CR40","doi-asserted-by":"publisher","first-page":"31333","DOI":"10.52202\/068431-2272","volume":"35","author":"T Chen","year":"2022","unstructured":"Chen, T., Saxena, S., Li, L., Lin, T.-Y., Fleet, D.J., Hinton, G.E.: A unified sequence interface for vision tasks. Adv. Neural. Inf. Process. Syst. 35, 31333\u201331346 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"3","key":"4344_CR41","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1145\/1015706.1015720","volume":"23","author":"C Rother","year":"2004","unstructured":"Rother, C., Kolmogorov, V., Blake, A.: \u201cgrabcut\u2019\u2019 interactive foreground extraction using iterated graph cuts. ACM Trans. Graphics (TOG) 23(3), 309\u2013314 (2004)","journal-title":"ACM Trans. Graphics (TOG)"},{"key":"4344_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, J., Hu, J.: Image segmentation based on 2d otsu method with histogram analysis. In: 2008 International Conference on Computer Science and Software Engineering, vol. 6, pp. 105\u2013108. IEEE (2008)","DOI":"10.1109\/CSSE.2008.206"},{"key":"4344_CR43","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"4344_CR44","unstructured":"Gevorgyan, Z.: Siou loss: more powerful learning for bounding box regression. arXiv preprint arXiv:2205.12740 (2022)"},{"key":"4344_CR45","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Bai, H., Xu, Y., Liao, C., Ling, H.: Lasot: a high-quality benchmark for large-scale single object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5374\u20135383 (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"issue":"5","key":"4344_CR46","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., Huang, K.: Got-10k: a large high-diversity benchmark for generic object tracking in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1562\u20131577 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4344_CR47","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B.: Trackingnet: a large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 300\u2013317 (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"4344_CR48","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: common objects in context. In: Computer vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"4344_CR49","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"4344_CR50","doi-asserted-by":"crossref","unstructured":"Yang, D., He, J., Ma, Y., Yu, Q., Zhang, T.: Foreground-background distribution modeling transformer for visual object tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10117\u201310127 (2023)","DOI":"10.1109\/ICCV51070.2023.00928"},{"key":"4344_CR51","doi-asserted-by":"crossref","unstructured":"Wu, Q., Yang, T., Liu, Z., Wu, B., Shan, Y., Chan, A.B.: Dropmae: Masked autoencoders with spatial-attention dropout for tracking tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14561\u201314571 (2023)","DOI":"10.1109\/CVPR52729.2023.01399"},{"key":"4344_CR52","doi-asserted-by":"crossref","unstructured":"Xie, F., Chu, L., Li, J., Lu, Y., Ma, C.: Videotrack: learning to track objects via video transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22826\u201322835 (2023)","DOI":"10.1109\/CVPR52729.2023.02186"},{"key":"4344_CR53","doi-asserted-by":"crossref","unstructured":"Gao, S., Zhou, C., Zhang, J.: Generalized relation modeling for transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18686\u201318695 (2023)","DOI":"10.1109\/CVPR52729.2023.01792"},{"key":"4344_CR54","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Zhong, B., Liang, Q., Mo, Z., Zhang, S., Li, X.: Odtrack: online dense temporal token learning for visual tracking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 7588\u20137596 (2024)","DOI":"10.1609\/aaai.v38i7.28591"},{"key":"4344_CR55","doi-asserted-by":"crossref","unstructured":"Xie, J., Zhong, B., Mo, Z., Zhang, S., Shi, L., Song, S., Ji, R.: Autoregressive queries for adaptive tracking with spatio-temporal transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19300\u201319309 (2024)","DOI":"10.1109\/CVPR52733.2024.01826"},{"key":"4344_CR56","doi-asserted-by":"crossref","unstructured":"Cai, W., Liu, Q., Wang, Y.: Hiptrack: visual tracking with historical prompts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19258\u201319267 (2024)","DOI":"10.1109\/CVPR52733.2024.01822"},{"key":"4344_CR57","doi-asserted-by":"crossref","unstructured":"Zhu, J., Tang, H., Chen, X., Wang, X., Wang, D., Lu, H.: Two-stream beats one-stream: asymmetric siamese network for efficient visual tracking. arXiv preprint arXiv:2503.00516 (2025)","DOI":"10.1609\/aaai.v39i10.33191"},{"key":"4344_CR58","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.: Fully-convolutional siamese networks for object tracking. In: European Conference on Computer Vision, pp. 850\u2013865. Springer (2016)","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"4344_CR59","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L.V., Timofte, R.: Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6182\u20136191 (2019)","DOI":"10.1109\/ICCV.2019.00628"},{"key":"4344_CR60","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., Xu, Y., Ling, H.: Swintrack: a simple and strong baseline for transformer tracking. Adv. Neural. Inf. Process. Syst. 35, 16743\u201316754 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4344_CR61","doi-asserted-by":"crossref","unstructured":"Fan, H., Bai, H., Lin, L., Yang, F., Deng, G., Yu, S., Harshit, Huang, M., Liu, J., Liu, J., et al.: Lasot: a high-quality large-scale single object tracking benchmark. Int. J. Comput. Vis. 129, 439\u2013461 (2021)","DOI":"10.1007\/s11263-020-01387-y"},{"key":"4344_CR62","doi-asserted-by":"crossref","unstructured":"Kiani\u00a0Galoogahi, H., Fagg, A., Huang, C., Ramanan, D., Lucey, S.: Need for speed: a benchmark for higher frame rate object tracking. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1125\u20131134 (2017)","DOI":"10.1109\/ICCV.2017.128"},{"key":"4344_CR63","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., Ghanem, B.: A benchmark and simulator for uav tracking. In: European Conference on Computer Vision, pp. 445\u2013461. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"4344_CR64","unstructured":"Kristan, M., Leonardis, A., Matas, J., Felsberg, M., Pflugfelder, R., K\u00e4m\u00e4r\u00e4inen, J.-K., Chang, H.J., Danelljan, M., Zajc, L.\u010c., Luke\u017ei\u010d, A., et al.: The tenth visual object tracking vot2022 challenge results. In: European Conference on Computer Vision, pp. 431\u2013460. Springer (2022)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04344-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04344-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04344-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T13:17:27Z","timestamp":1777468647000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04344-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":64,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["4344"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04344-3","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"30 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 April 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"250"}}