{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T09:04:56Z","timestamp":1775552696656,"version":"3.50.1"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2024,2,17]],"date-time":"2024-02-17T00:00:00Z","timestamp":1708128000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,17]],"date-time":"2024-02-17T00:00:00Z","timestamp":1708128000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Scientific and Technological Innovation Leading Talent Project","award":["2022TSYCLJ0037"],"award-info":[{"award-number":["2022TSYCLJ0037"]}]},{"name":"the Scientific and Technological Innovation 2030 Major Project","award":["2022ZD0115800"],"award-info":[{"award-number":["2022ZD0115800"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1903213"],"award-info":[{"award-number":["U1903213"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00521-024-09444-0","type":"journal-article","created":{"date-parts":[[2024,2,17]],"date-time":"2024-02-17T13:01:55Z","timestamp":1708174915000},"page":"7043-7056","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["ATFTrans: attention-weighted token fusion transformer for robust and efficient object tracking"],"prefix":"10.1007","volume":"36","author":[{"given":"Liang","family":"Xu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0210-2273","authenticated-orcid":false,"given":"Liejun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiqing","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,2,17]]},"reference":[{"key":"9444_CR1","doi-asserted-by":"crossref","unstructured":"Bertinetto L, Valmadre J, Henriques JF, Vedaldi A, Torr PH (2016) Fully-convolutional siamese networks for object tracking. In: Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8-10 and 15-16, 2016, Proceedings, Part II 14, pp 850\u2013865 Springer","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"9444_CR2","doi-asserted-by":"publisher","first-page":"8173","DOI":"10.1007\/s00521-022-06911-4","volume":"34","author":"H Huang","year":"2022","unstructured":"Huang H, Liu G, Zhang Y, Xiong R, Zhang S (2022) Ensemble siamese networks for object tracking. Neural Comput Appl 34:8173\u20138191. https:\/\/doi.org\/10.1007\/s00521-022-06911-4","journal-title":"Neural Comput Appl"},{"key":"9444_CR3","doi-asserted-by":"publisher","first-page":"3745","DOI":"10.1007\/s00521-021-06638-8","volume":"34","author":"X Ke","year":"2022","unstructured":"Ke X, Li Y, Guo W, Huang Y (2022) Learning deep convolutional descriptor aggregation for efficient visual tracking. Neural Comput Appl 34:3745\u20133765. https:\/\/doi.org\/10.1007\/s00521-021-06638-8","journal-title":"Neural Comput Appl"},{"key":"9444_CR4","doi-asserted-by":"publisher","first-page":"12611","DOI":"10.1007\/s00521-023-08422-2","volume":"35","author":"F Meng","year":"2023","unstructured":"Meng F, Gong X, Zhang Y (2023) Rhl-track: visual object tracking based on recurrent historical localization. Neural Comput Appl 35:12611\u201312625. https:\/\/doi.org\/10.1007\/s00521-023-08422-2","journal-title":"Neural Comput Appl"},{"key":"9444_CR5","doi-asserted-by":"crossref","unstructured":"Wang Q, Zhang L, Bertinetto L, Hu W, Torr PH (2019) Fast online object tracking and segmentation: A unifying approach. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 1328\u20131338","DOI":"10.1109\/CVPR.2019.00142"},{"key":"9444_CR6","doi-asserted-by":"crossref","unstructured":"Voigtlaender P, Luiten J, Torr PH, Leibe B (2020) Siam r-cnn: Visual tracking by re-detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 6578\u20136588","DOI":"10.1109\/CVPR42600.2020.00661"},{"key":"9444_CR7","doi-asserted-by":"crossref","unstructured":"Han W, Dong X, Khan FS, Shao L, Shen J (2021) Learning to fuse asymmetric feature maps in siamese trackers. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition ((CVPR)), pp 16570\u201316580","DOI":"10.1109\/CVPR46437.2021.01630"},{"key":"9444_CR8","doi-asserted-by":"crossref","unstructured":"Bao J, Chen K, Sun X, Zhao L, Diao W, Yan M (2023) Siamthn: Siamese target highlight network for visual tracking. IEEE Trans Circ Syst Video Technol","DOI":"10.1109\/TCSVT.2023.3266485"},{"key":"9444_CR9","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1109\/TIP.2020.3037518","volume":"30","author":"D Yuan","year":"2021","unstructured":"Yuan D, Chang X, Huang P-Y, Liu Q, He Z (2021) Self-supervised deep correlation tracking. IEEE Trans Image Proc 30:976\u2013985. https:\/\/doi.org\/10.1109\/TIP.2020.3037518","journal-title":"IEEE Trans Image Proc"},{"key":"9444_CR10","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1109\/TMM.2021.3074239","volume":"24","author":"K Yang","year":"2022","unstructured":"Yang K, He Z, Pei W, Zhou Z, Li X, Yuan D, Zhang H (2022) Siamcorners: siamese corner networks for visual tracking. IEEE Trans Multimed 24:1956\u20131967. https:\/\/doi.org\/10.1109\/TMM.2021.3074239","journal-title":"IEEE Trans Multimed"},{"key":"9444_CR11","doi-asserted-by":"publisher","unstructured":"Xie F, Wang C, Wang G, Yang W, Zeng W (2021) Learning tracking representations via dual-branch fully transformer networks. In: 2021 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW), pp 2688\u20132697 https:\/\/doi.org\/10.1109\/ICCVW54120.2021.00303","DOI":"10.1109\/ICCVW54120.2021.00303"},{"key":"9444_CR12","doi-asserted-by":"crossref","unstructured":"Yu B, Tang M, Zheng L, Zhu G, Wang J, Feng H, Feng X, Lu H (2021) High-performance discriminative tracking with transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (CVPR), pp 9856\u20139865","DOI":"10.1109\/ICCV48922.2021.00971"},{"key":"9444_CR13","unstructured":"Zhao M, Okada K, Inaba M (2021) Trtr: Visual tracking with transformer. arXiv preprint arXiv:2105.03817"},{"key":"9444_CR14","doi-asserted-by":"publisher","unstructured":"Fu Z, Liu Q, Cai W, Wang Y (2022) Sparsett: Visual tracking with sparse transformers pp 905\u2013912 https:\/\/doi.org\/10.24963\/ijcai.2022\/127","DOI":"10.24963\/ijcai.2022\/127"},{"key":"9444_CR15","doi-asserted-by":"crossref","unstructured":"Cao Z, Huang Z, Pan L, Zhang S, Liu Z, Fu C (2022) Tctrack: temporal contexts for aerial tracking. In: 2011 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 14798\u201314808","DOI":"10.1109\/CVPR52688.2022.01438"},{"key":"9444_CR16","doi-asserted-by":"publisher","unstructured":"Zhou X, Yin T, Koltun V, Kr\u00e4henb\u00fchl P (2022) Global tracking transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8761\u20138770 https:\/\/doi.org\/10.1109\/CVPR52688.2022.00857","DOI":"10.1109\/CVPR52688.2022.00857"},{"key":"9444_CR17","doi-asserted-by":"crossref","unstructured":"Song Z, Luo R, Yu J, Chen Y-PP, Yang W (2023) Compact transformer tracker with correlative masked modeling. arXiv preprint arXiv:2301.10938","DOI":"10.1609\/aaai.v37i2.25327"},{"key":"9444_CR18","doi-asserted-by":"crossref","unstructured":"Blatter P, Kanakis M, Danelljan M, Van\u00a0Gool L (2023) Efficient visual tracking with exemplar transformers. In: 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (CVPR), pp 1571\u20131581","DOI":"10.1109\/WACV56688.2023.00162"},{"key":"9444_CR19","doi-asserted-by":"crossref","unstructured":"Ma F, Shou MZ, Zhu L, Fan H, Xu Y, Yang Y, Yan Z (2022) Unified transformer tracker for object tracking. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8781\u20138790","DOI":"10.1109\/CVPR52688.2022.00858"},{"key":"9444_CR20","doi-asserted-by":"crossref","unstructured":"Tang W, Kang H, Zhang H, Yu P, Arnold CW, Zhang R (2022) Transformer lesion tracker. In: Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2022: 25th International Conference, Singapore, September 18\u201322, 2022, Proceedings, Part VI, pp 196\u2013206. Springer","DOI":"10.1007\/978-3-031-16446-0_19"},{"key":"9444_CR21","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Proc Syst 30"},{"key":"9444_CR22","doi-asserted-by":"publisher","unstructured":"Wang N, Zhou W, Wang J, Li H (2021) Transformer meets tracker: Exploiting temporal context for robust visual tracking. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 1571\u20131580 https:\/\/doi.org\/10.1109\/CVPR46437.2021.00162","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"9444_CR23","doi-asserted-by":"crossref","unstructured":"Chen X, Yan B, Zhu J, Wang D, Yang X, Lu H (2021) Transformer tracking. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8126\u20138135","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"9444_CR24","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"9444_CR25","doi-asserted-by":"publisher","unstructured":"Kim S, Shen S, Thorsley D, Gholami A, Kwon W, Hassoun J, Keutzer K (2022) Learned token pruning for transformers. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp 784\u2013794 https:\/\/doi.org\/10.1145\/3534678.3539260","DOI":"10.1145\/3534678.3539260"},{"key":"9444_CR26","doi-asserted-by":"crossref","unstructured":"Yin H, Vahdat A, Alvarez JM, Mallya A, Kautz J, Molchanov P (2022) A-vit: Adaptive tokens for efficient vision transformer. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 10799\u201310808","DOI":"10.1109\/CVPR52688.2022.01054"},{"key":"9444_CR27","doi-asserted-by":"publisher","unstructured":"Fayyaz M, Kouhpayegani SA, Jafari FR, Sommerlade E, Joze HRV, Pirsiavash H, Gall J (2021) Ats: Adaptive token sampling for efficient vision transformers. arXiv preprint arXiv:2111.15667https:\/\/doi.org\/10.1109\/CVPR52688.2022.01054","DOI":"10.1109\/CVPR52688.2022.01054"},{"issue":"7","key":"9444_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11432-022-3646-6","volume":"66","author":"H Yu","year":"2023","unstructured":"Yu H, Wu J (2023) A unified pruning framework for vision transformers. Sci China Inf Sci 66(7):1\u20132","journal-title":"Sci China Inf Sci"},{"key":"9444_CR29","doi-asserted-by":"publisher","unstructured":"Song Z, Xu Y, He Z, Jiang L, Jing N, Liang X (2022) Cp-vit: Cascade vision transformer pruning via progressive sparsity prediction. CoRR https:\/\/doi.org\/10.48550\/arXiv.2203.04570","DOI":"10.48550\/arXiv.2203.04570"},{"key":"9444_CR30","unstructured":"Liang Y, Ge C, Tong Z, Song Y, Wang J, Xie P (2022) Not all patches are what you need: Expediting vision transformers via token reorganizations. arXiv preprint arXiv:2202.07800"},{"key":"9444_CR31","doi-asserted-by":"crossref","unstructured":"Wei S, Ye T, Zhang S, Tang Y, Liang J (2023) Joint token pruning and squeezing towards more aggressive compression of vision transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 2092\u20132101","DOI":"10.1109\/CVPR52729.2023.00208"},{"key":"9444_CR32","unstructured":"Thangavel J, Kokul T, Ramanan A, Fernando S (2023) Transformers in single object tracking: an experimental survey. arXiv preprint arXiv:2302.11867"},{"key":"9444_CR33","doi-asserted-by":"crossref","unstructured":"Cui Y, Jiang C, Wang L, Wu G (2022) Mixformer: End-to-end tracking with iterative mixed attention. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 13608\u201313618","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"9444_CR34","doi-asserted-by":"crossref","unstructured":"Chen B, Li P, Bai L, Qiao L, Shen Q, Li B, Gan W, Wu W, Ouyang W (2022) Backbone is all your need: a simplified architecture for visual object tracking. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXII, pp 375\u2013392 Springer","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"9444_CR35","unstructured":"Lin L, Fan H, Xu Y, Ling H (2022) Swintrack: A simple and strong baseline for transformer tracking. In: Advances in Neural Information Processing Systems, vol. 35, pp 16743\u201316754"},{"key":"9444_CR36","doi-asserted-by":"publisher","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp 9992\u201310002 https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"9444_CR37","doi-asserted-by":"publisher","unstructured":"Wu Q, Yang T, Liu Z, Wu B, Shan Y, Chan AB (2023) Dropmae: Masked autoencoders with spatial-attention dropout for tracking tasks. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 14561\u201314571 https:\/\/doi.org\/10.1109\/CVPR52729.2023.01399","DOI":"10.1109\/CVPR52729.2023.01399"},{"key":"9444_CR38","doi-asserted-by":"crossref","unstructured":"Zhao H, Wang D, Lu H (2023) Representation learning for visual object tracking by masked appearance transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 18696\u201318705","DOI":"10.1109\/CVPR52729.2023.01793"},{"key":"9444_CR39","doi-asserted-by":"crossref","unstructured":"Ye B, Chang H, Ma B, Shan S, Chen X (2022) Joint feature learning and relation modeling for tracking: A one-stream framework. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXII, pp 341\u2013357 Springer","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"9444_CR40","doi-asserted-by":"publisher","unstructured":"Lan J-P, Cheng Z-Q, He J-Y, Li C, Luo B, Bao X, Xiang W, Geng Y, Xie X (2023) Procontext: Exploring progressive context transformer for tracking. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 1\u20135 https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10094971 . IEEE","DOI":"10.1109\/ICASSP49357.2023.10094971"},{"key":"9444_CR41","doi-asserted-by":"crossref","unstructured":"Tang Y, Han K, Wang Y, Xu C, Guo J, Xu C, Tao D (2022) Patch slimming for efficient vision transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 12165\u201312174","DOI":"10.1109\/CVPR52688.2022.01185"},{"key":"9444_CR42","doi-asserted-by":"publisher","unstructured":"Yin H, Vahdat A, Alvarez JM, Mallya A, Kautz J, Molchanov P (2022) A-vit: Adaptive tokens for efficient vision transformer. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 10799\u201310808 https:\/\/doi.org\/10.1109\/CVPR52688.2022.01054","DOI":"10.1109\/CVPR52688.2022.01054"},{"key":"9444_CR43","doi-asserted-by":"crossref","unstructured":"Fayyaz M, Koohpayegani SA, Jafari FR, Sengupta S, Joze HRV, Sommerlade E, Pirsiavash H, Gall J (2022) Adaptive token sampling for efficient vision transformers. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 396\u2013414","DOI":"10.1007\/978-3-031-20083-0_24"},{"key":"9444_CR44","doi-asserted-by":"crossref","unstructured":"Xu Y, Zhang Z, Zhang M, Sheng K, Li K, Dong W, Zhang L, Xu C, Sun X (2022) Evo-vit: Slow-fast token evolution for dynamic vision transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp 2964\u20132972","DOI":"10.1609\/aaai.v36i3.20202"},{"key":"9444_CR45","first-page":"13937","volume":"34","author":"Y Rao","year":"2021","unstructured":"Rao Y, Zhao W, Liu B, Lu J, Zhou J, Hsieh C-J (2021) Dynamicvit: Efficient vision transformers with dynamic token sparsification. Adv Neural Inf Proc Syst 34:13937\u201313949","journal-title":"Adv Neural Inf Proc Syst"},{"key":"9444_CR46","doi-asserted-by":"crossref","unstructured":"Meng L, Li H, Chen B-C, Lan S, Wu Z, Jiang Y-G, Lim S-N (2022) Adavit: Adaptive vision transformers for efficient image recognition. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 12309\u201312318","DOI":"10.1109\/CVPR52688.2022.01199"},{"key":"9444_CR47","doi-asserted-by":"crossref","unstructured":"Kong Z, Dong P, Ma X, Meng X, Sun M, Niu W, Shen X, Yuan G, Ren B, Qin M, et al (2022) Spvit: Enabling faster vision transformers via latency-aware soft token pruning, pp 620\u2013640. Springer","DOI":"10.1007\/978-3-031-20083-0_37"},{"key":"9444_CR48","doi-asserted-by":"crossref","unstructured":"Fan H, Lin L, Yang F, Chu P, Deng G, Yu S, Bai H, Xu Y, Liao C, Ling H (2019) Lasot: A high-quality benchmark for large-scale single object tracking. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 5374\u20135383","DOI":"10.1109\/CVPR.2019.00552"},{"key":"9444_CR49","doi-asserted-by":"crossref","unstructured":"Muller M, Bibi A, Giancola S, Alsubaihi S, Ghanem B (2018) Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 300\u2013317","DOI":"10.1007\/978-3-030-01246-5_19"},{"issue":"5","key":"9444_CR50","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang L, Zhao X, Huang K (2019) Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE Trans Pattern Anal Mach Intell 43(5):1562\u20131577","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9444_CR51","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp 740\u2013755 Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9444_CR52","doi-asserted-by":"publisher","unstructured":"He K, Chen X, Xie S, Li Y, Doll\u00e1r P, Girshick R (2022) Masked autoencoders are scalable vision learners. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 15979\u201315988 https:\/\/doi.org\/10.1109\/CVPR52688.2022.01553","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"9444_CR53","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. In: International Conference on Learning Representations"},{"key":"9444_CR54","doi-asserted-by":"publisher","unstructured":"Kiani\u00a0Galoogahi H, Fagg A, Huang C, Ramanan D, Lucey S (2017) Need for speed: A benchmark for higher frame rate object tracking. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp 1134\u20131143 https:\/\/doi.org\/10.1109\/ICCV.2017.128","DOI":"10.1109\/ICCV.2017.128"},{"key":"9444_CR55","doi-asserted-by":"crossref","unstructured":"Wang X, Shu X, Zhang Z, Jiang B, Wang Y, Tian Y, Wu F (2021) Towards more flexible and accurate object tracking with natural language: Algorithms and benchmark. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13763\u201313773","DOI":"10.1109\/CVPR46437.2021.01355"},{"key":"9444_CR56","doi-asserted-by":"crossref","unstructured":"Mueller M, Smith N, Ghanem B (2016) A benchmark and simulator for uav tracking. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp 445\u2013461 Springer","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"9444_CR57","doi-asserted-by":"crossref","unstructured":"He K, Zhang C, Xie S, Li Z, Wang Z (2023) Target-aware tracking with long-term context attention. arXiv preprint arXiv:2302.13840","DOI":"10.1609\/aaai.v37i1.25155"},{"key":"9444_CR58","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin L, Fan H, Zhang Z, Xu Y, Ling H (2022) Swintrack: A simple and strong baseline for transformer tracking. Adv Neural Inf Proc Syst 35:16743\u201316754","journal-title":"Adv Neural Inf Proc Syst"},{"key":"9444_CR59","doi-asserted-by":"crossref","unstructured":"Gao S, Zhou C, Ma C, Wang X, Yuan J (2022) Aiatrack: Attention in attention for transformer visual tracking. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXII, pp 146\u2013164 Springer","DOI":"10.1007\/978-3-031-20047-2_9"},{"key":"9444_CR60","doi-asserted-by":"crossref","unstructured":"Mayer C, Danelljan M, Bhat G, Paul M, Paudel DP, Yu F, Van\u00a0Gool L (2022) Transforming model prediction for tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8731\u20138740","DOI":"10.1109\/CVPR52688.2022.00853"},{"key":"9444_CR61","doi-asserted-by":"crossref","unstructured":"Song Z, Yu J, Chen Y-PP, Yang W (2022) Transformer tracking with cyclic shifting window attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8791\u20138800","DOI":"10.1109\/CVPR52688.2022.00859"},{"key":"9444_CR62","doi-asserted-by":"crossref","unstructured":"Yan B, Peng H, Fu J, Wang D, Lu H (2021) Learning spatio-temporal transformer for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 10448\u201310457","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"9444_CR63","doi-asserted-by":"crossref","unstructured":"Zhang Z, Peng H, Fu J, Li B, Hu W (2020) Ocean: Object-aware anchor-free tracking. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16, pp 771\u2013787 Springer","DOI":"10.1007\/978-3-030-58589-1_46"},{"key":"9444_CR64","doi-asserted-by":"crossref","unstructured":"Bhat G, Danelljan M, Gool LV, Timofte R (2019) Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 6182\u20136191","DOI":"10.1109\/ICCV.2019.00628"},{"key":"9444_CR65","doi-asserted-by":"publisher","unstructured":"Mayer C, Danelljan M, Paudel DP, Van\u00a0Gool L (2021) Learning target candidate association to keep track of what not to track. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp 13424\u201313434 https:\/\/doi.org\/10.1109\/ICCV48922.2021.01319","DOI":"10.1109\/ICCV48922.2021.01319"},{"key":"9444_CR66","doi-asserted-by":"crossref","unstructured":"Chen X, Peng H, Wang D, Lu H, Hu H (2023) Seqtrack: Sequence to sequence learning for visual object tracking. arXiv preprint arXiv:2304.14394","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"9444_CR67","doi-asserted-by":"crossref","unstructured":"Gao S, Zhou C, Zhang J (2023) Generalized relation modeling for transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 18686\u201318695","DOI":"10.1109\/CVPR52729.2023.01792"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09444-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09444-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09444-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,21]],"date-time":"2024-03-21T13:14:31Z","timestamp":1711026871000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09444-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,17]]},"references-count":67,"journal-issue":{"issue":"13","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["9444"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09444-0","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,17]]},"assertion":[{"value":"5 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}