{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T20:29:13Z","timestamp":1774384153400,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Nature Science Foundation of Hubei Province","award":["No.2023AFB356"],"award-info":[{"award-number":["No.2023AFB356"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No.62302349"],"award-info":[{"award-number":["No.62302349"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00371-025-03964-z","type":"journal-article","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T02:48:55Z","timestamp":1748141335000},"page":"6631-6644","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["ViT-BF: vision transformer with border-aware features for visual tracking"],"prefix":"10.1007","volume":"41","author":[{"given":"Kai","family":"Yang","sequence":"first","affiliation":[]},{"given":"Wenhao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ping","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jinxing","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Jia","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Li","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xinrong","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Junping","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,25]]},"reference":[{"key":"3964_CR1","doi-asserted-by":"crossref","unstructured":"Qiu, H., Ma, Y., Li, Z., Liu, S., Sun, J.: Borderdet: Border feature for dense object detection. In: Proceedings of the European Conference on Computer Vision, pages 549\u2013564. Springer, (2020)","DOI":"10.1007\/978-3-030-58452-8_32"},{"key":"3964_CR2","doi-asserted-by":"publisher","first-page":"8553","DOI":"10.1109\/TIP.2021.3117077","volume":"30","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Liu, Y., Li, B., Weiming, H., Peng, H.: Toward accurate pixelwise object tracking via attention retrieval. IEEE Trans. Image Process. 30, 8553\u20138566 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"3964_CR3","doi-asserted-by":"publisher","first-page":"4814","DOI":"10.1109\/TIP.2021.3076272","volume":"30","author":"S Yao","year":"2021","unstructured":"Yao, S., Han, X., Zhang, H., Wang, X., Cao, X.: Learning deep lucas-kanade siamese network for visual tracking. IEEE Trans. Image Process. 30, 4814\u20134827 (2021)","journal-title":"IEEE Trans. Image Process."},{"issue":"5","key":"3964_CR4","doi-asserted-by":"publisher","first-page":"2976","DOI":"10.1109\/TCSVT.2021.3094645","volume":"32","author":"Z Zhou","year":"2021","unstructured":"Zhou, Z., Li, X., Zhang, T., Wang, H., He, Z.: Object tracking via spatial-temporal memory network. IEEE Trans. Circuits Syst. Video Technol. 32(5), 2976\u20132989 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3964_CR5","doi-asserted-by":"publisher","first-page":"2656","DOI":"10.1109\/TIP.2021.3049970","volume":"30","author":"H Tan","year":"2021","unstructured":"Tan, H., Zhang, X., Zhang, Z., Lan, L., Zhang, W., Luo, Z.: Nocal-siam: Refining visual features and response with advanced non-local blocks for real-time siamese tracking. IEEE Trans. Image Process. 30, 2656\u20132668 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"3964_CR6","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1109\/TMM.2021.3074239","volume":"24","author":"K Yang","year":"2021","unstructured":"Yang, K., He, Z., Pei, W., Zhou, Z., Li, X., Yuan, D., Zhang, H.: Siamcorners: Siamese corner networks for visual tracking. IEEE Trans. Multimedia 24, 1956\u20131967 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"3964_CR7","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wang, L., Zhang, H., Lu, W., Yin, J.: Rpt: Learning point set representation for siamese visual tracking. In: Proceedings of the European Conference on Computer Vision, pages 653\u2013665. Springer, (2020)","DOI":"10.1007\/978-3-030-68238-5_43"},{"key":"3964_CR8","unstructured":"Alexey, D.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929, (2020)"},{"key":"3964_CR9","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool Luc,\u00a0V., Timofte, R.: Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 6182\u20136191, (2019)","DOI":"10.1109\/ICCV.2019.00628"},{"key":"3964_CR10","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H.: Transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 8126\u20138135, (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"3964_CR11","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan Fahad,\u00a0S., Felsberg. M.: Atom: Accurate tracking by overlap maximization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 4660\u20134669, (2019)","DOI":"10.1109\/CVPR.2019.00479"},{"key":"3964_CR12","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, Junjie. Siamrpn++: Evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 4282\u20134291, (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"3964_CR13","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Luiten, J., Torr Philip, H.S., Leibe Bastian. Siam r-cnn: Visual tracking by re-detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 6578\u20136588, (2020)","DOI":"10.1109\/CVPR42600.2020.00661"},{"issue":"3","key":"3964_CR14","doi-asserted-by":"publisher","first-page":"1165","DOI":"10.1109\/TCE.2011.6018870","volume":"57","author":"JS Kim","year":"2011","unstructured":"Kim, J.S., Yeom, D.H., Joo, Y.H.: Fast and robust algorithm of tracking multiple moving objects for intelligent video surveillance systems. IEEE Trans. Consumer Electron. 57(3), 1165\u20131170 (2011)","journal-title":"IEEE Trans. Consumer Electron."},{"issue":"2","key":"3964_CR15","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1109\/TCE.2012.6227420","volume":"58","author":"J Han","year":"2012","unstructured":"Han, J., Pauwels, E.J., de Zeeuw, P.M., de With, P.H.N.: Employing a rgb-d sensor for real-time tracking of humans across multiple re-entries in a smart environment. IEEE Trans. Consumer Electron. 58(2), 255\u2013263 (2012)","journal-title":"IEEE Trans. Consumer Electron."},{"key":"3964_CR16","unstructured":"Dosovitskiy Alexey. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, (2020)"},{"key":"3964_CR17","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G.: Mixformer: End-to-end tracking with iterative mixed attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 13608\u201313618, (2022)","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"3964_CR18","doi-asserted-by":"crossref","unstructured":"Chen, X., Peng, H., Wang, D., Lu, H., Hu, Han.: Seqtrack: Sequence to sequence learning for visual object tracking. In: Proceedings of the Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 14572\u201314581, (2023)","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"3964_CR19","doi-asserted-by":"crossref","unstructured":"Wei, X., Bai, Y., Zheng, Y., Shi, D., Gong, Y.: Autoregressive visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 9697\u20139706, (2023)","DOI":"10.1109\/CVPR52729.2023.00935"},{"key":"3964_CR20","doi-asserted-by":"crossref","unstructured":"Wu, Q., Yang, T., Liu, Z., Wu, B., Shan, Y., Chan Antoni,\u00a0B.: Dropmae: Masked autoencoders with spatial-attention dropout for tracking tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pages 14561\u201314571, (2023)","DOI":"10.1109\/CVPR52729.2023.01399"},{"key":"3964_CR21","doi-asserted-by":"crossref","unstructured":"Xie, F., Wang, Z., Ma, C.: Diffusiontrack: Point set diffusion model for visual object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 19113\u201319124, (2024)","DOI":"10.1109\/CVPR52733.2024.01808"},{"key":"3964_CR22","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., Li, H.: Transformer meets tracker: Exploiting temporal context for robust visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 1571\u20131580, (2021)","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"3964_CR23","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhang, W., Cao, Y., Chen, K., Pang, J., Gong, T., Shi, J., Loy Chen,\u00a0C., Lin, Dahua.: Side-aware boundary localization for more precise object detection. In: Proceedings of the European Conference on Computer Vision, pages 403\u2013419. Springer, (2020)","DOI":"10.1007\/978-3-030-58548-8_24"},{"key":"3964_CR24","doi-asserted-by":"crossref","unstructured":"Gidaris, S., Komodakis, N.. Locnet: Improving localization accuracy for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 789\u2013798, (2016)","DOI":"10.1109\/CVPR.2016.92"},{"key":"3964_CR25","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, K., Yang, S., Loy Chen,\u00a0C., Lin, D.: Region proposal by guided anchoring. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 2965\u20132974, (2019)","DOI":"10.1109\/CVPR.2019.00308"},{"key":"3964_CR26","unstructured":"Vu, T., Jang, H., Pham Trung,\u00a0X., Yoo, C.: Cascade rpn: Delving into high-quality region proposal network with adaptive convolution. Advances in Neural Information Processing Systems, 32, (2019)"},{"key":"3964_CR27","doi-asserted-by":"crossref","unstructured":"Yang, Z., Liu, S., Hu, H., Wang, L., Lin, S.: Reppoints: Point set representation for object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 9657\u20139666, (2019)","DOI":"10.1109\/ICCV.2019.00975"},{"key":"3964_CR28","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: Cornernet: Detecting objects as paired keypoints. In: Proceedings of the European Conference on Computer Vision, pages 734\u2013750, (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"3964_CR29","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., Lu, H.: Learning spatio-temporal transformer for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 10448\u201310457, (2021)","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"3964_CR30","unstructured":"Faster RCNN. Towards real-time object detection with region proposal networks. Advances in Neural Information Processing Systems, 9199(10.5555):2969239\u20132969250, (2015)"},{"key":"3964_CR31","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Bai, H., Xu, Y., Liao, C., Ling, H.: Lasot: A high-quality benchmark for large-scale single object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 5374\u20135383, (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"issue":"5","key":"3964_CR32","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., Huang, K.: Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE Trans. Pattern Analy. Machine Intell. 43(5), 1562\u20131577 (2019)","journal-title":"IEEE Trans. Pattern Analy. Machine Intell."},{"key":"3964_CR33","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B.: Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European Conference on Computer Vision, pages 300\u2013317, (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"3964_CR34","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r Piotr, Zitnick C\u00a0Lawrence.: Microsoft coco: Common objects in context. In: Proceedings of the European Conference on Computer Vision, pages 740\u2013755. Springer, (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3964_CR35","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101, (2017)"},{"key":"3964_CR36","unstructured":"Fisher Ronald,\u00a0A.: Statistical methods for research workers. (1934)"},{"key":"3964_CR37","unstructured":"Kingma Diederik\u00a0P, Ba Jimmy. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980, (2014)"},{"key":"3964_CR38","doi-asserted-by":"crossref","unstructured":"Hong Lingyi, Yan Shilin, Zhang Renrui, Li Wanyun, Zhou Xinyu, Guo Pinxue, Jiang Kaixun, Chen Yiting, Li Jinglun, Chen Zhaoyu, et\u00a0al. Onetracker: Unifying visual object tracking with foundation models and efficient tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 19079\u201319091, (2024)","DOI":"10.1109\/CVPR52733.2024.01805"},{"key":"3964_CR39","doi-asserted-by":"crossref","unstructured":"Cai, Y., Liu, J., Tang, J., Wu, G.: Robust object modeling for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 9589\u20139600, (2023)","DOI":"10.1109\/ICCV51070.2023.00879"},{"key":"3964_CR40","unstructured":"Wang, J., Chen, D., Wu, Z., Luo, C., Dai, X., Yuan, L., Jiang, Y.G.: Omnitracker: Unifying object tracking by tracking-with-detection. arXiv preprint arXiv:2303.12079, (2023)"},{"key":"3964_CR41","doi-asserted-by":"crossref","unstructured":"Zhao, H., Wang, D., Lu, H.: Representation learning for visual object tracking by masked appearance transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 18696\u201318705, (2023)","DOI":"10.1109\/CVPR52729.2023.01793"},{"key":"3964_CR42","doi-asserted-by":"crossref","unstructured":"Chen, B., Li, P., Bai, L., Qiao, L., Shen, Q., Li, B., Gan, W., Wu, W., Ouyang. W.: Backbone is all your need: A simplified architecture for visual object tracking. In: Proceedings of the European Conference on Computer Vision, pages 375\u2013392. Springer, (2022)","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"3964_CR43","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., Yong, X., Ling, H.: Swintrack: A simple and strong baseline for transformer tracking. Adv. Neural Inform. Process. Syst. 35, 16743\u201316754 (2022)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"3964_CR44","unstructured":"An, X., Deng, J., Yang, K., Li, J., Feng, Z., Guo, J., Yang, J., Liu, T.: Unicom: Universal and compact representation learning for image retrieval. arXiv preprint arXiv:2304.05884, (2023)"},{"key":"3964_CR45","doi-asserted-by":"crossref","unstructured":"Mayer, C., Danelljan, M., Bhat, G., Paul, M., Paudel Danda,\u00a0P., Yu, F., Van\u00a0Gool, L.: Transforming model prediction for tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 8731\u20138740, (2022)","DOI":"10.1109\/CVPR52688.2022.00853"},{"key":"3964_CR46","doi-asserted-by":"crossref","unstructured":"Xie, F., Wang, C., Wang, G., Cao, Y., Yang, W., Zeng, W.: Correlation-aware deep tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 8751\u20138760, (2022)","DOI":"10.1109\/CVPR52688.2022.00855"},{"key":"3964_CR47","unstructured":"UT\u00a0Benchmark.: A benchmark and simulator for uav tracking. In Proceedings of the European Conference on Computer Vision, (2016)"},{"key":"3964_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, L., Zhou, Z., Mao, K., He, Z.: Joint visual grounding and tracking with natural language specification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 23151\u201323160, (2023)","DOI":"10.1109\/CVPR52729.2023.02217"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03964-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-03964-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03964-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T11:04:16Z","timestamp":1751886256000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-03964-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,25]]},"references-count":48,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["3964"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-03964-z","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,25]]},"assertion":[{"value":"24 April 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}}]}}