{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T10:02:53Z","timestamp":1771495373292,"version":"3.50.1"},"reference-count":76,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100004479","name":"Jiangxi Provincial Natural Science Foundation","doi-asserted-by":"publisher","award":["20242BAB25075"],"award-info":[{"award-number":["20242BAB25075"]}],"id":[{"id":"10.13039\/501100004479","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004479","name":"Jiangxi Provincial Natural Science Foundation","doi-asserted-by":"publisher","award":["20242BAB25058"],"award-info":[{"award-number":["20242BAB25058"]}],"id":[{"id":"10.13039\/501100004479","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004479","name":"Jiangxi Provincial Natural Science Foundation","doi-asserted-by":"publisher","award":["20252BAC250017"],"award-info":[{"award-number":["20252BAC250017"]}],"id":[{"id":"10.13039\/501100004479","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.engappai.2026.114121","type":"journal-article","created":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T17:00:42Z","timestamp":1770483642000},"page":"114121","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Compact axial attention with detail enhancement for visual object tracking"],"prefix":"10.1016","volume":"169","author":[{"given":"Yuanyun","family":"Wang","sequence":"first","affiliation":[]},{"given":"Geng","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Chao","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6750-5105","authenticated-orcid":false,"given":"Jun","family":"Wang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114121_b1","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.S., 2016. Fully-Convolutional Siamese Networks for Object Tracking. In: European Conference on Computer Vision. ECCV, pp. 850\u2013865.","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"10.1016\/j.engappai.2026.114121_b2","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L.V., Timofte, R., 2019. Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 6182\u20136191.","DOI":"10.1109\/ICCV.2019.00628"},{"key":"10.1016\/j.engappai.2026.114121_b3","series-title":"European Conference on Computer Vision","first-page":"205","article-title":"Know your surroundings: Exploiting scene information for object tracking","author":"Bhat","year":"2020"},{"key":"10.1016\/j.engappai.2026.114121_b4","doi-asserted-by":"crossref","unstructured":"Blatter, P., Kanakis, M., Danelljan, M., Gool, L.V., 2023. Efficient Visual Tracking with Exemplar Transformers. In: 2023 IEEE\/CVF Winter Conference on Applications of Computer Vision. WACV, pp. 1571\u20131581.","DOI":"10.1109\/WACV56688.2023.00162"},{"key":"10.1016\/j.engappai.2026.114121_b5","doi-asserted-by":"crossref","unstructured":"Chen, X., Peng, H., Wang, D., Lu, H., Hu, H., 2023. Seqtrack: Sequence to sequence learning for visual object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 14572\u201314581.","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"10.1016\/j.engappai.2026.114121_b6","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H., 2021. Transformer Tracking. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 8122\u20138131.","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"10.1016\/j.engappai.2026.114121_b7","series-title":"Target transformed regression for accurate tracking","author":"Cui","year":"2021"},{"key":"10.1016\/j.engappai.2026.114121_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2022.103547","article-title":"Fully convolutional online tracking","volume":"224","author":"Cui","year":"2022","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.engappai.2026.114121_b9","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G., 2022b. MixFormer: End-to-End Tracking with Iterative Mixed Attention. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 13598\u201313608.","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"10.1016\/j.engappai.2026.114121_b10","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F.S., Felsberg, M., 2019. Atom: Accurate tracking by overlap maximization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 4660\u20134669.","DOI":"10.1109\/CVPR.2019.00479"},{"key":"10.1016\/j.engappai.2026.114121_b11","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Gool, L.V., Timofte, R., 2020. Probabilistic regression for visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 7183\u20137192.","DOI":"10.1109\/CVPR42600.2020.00721"},{"key":"10.1016\/j.engappai.2026.114121_b12","doi-asserted-by":"crossref","unstructured":"Du, F., Liu, P., Zhao, W., Tang, X., 2020. Correlation-guided attention for corner detection based visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 6836\u20136845.","DOI":"10.1109\/CVPR42600.2020.00687"},{"key":"10.1016\/j.engappai.2026.114121_b13","doi-asserted-by":"crossref","unstructured":"Duan, K., Bai, S., Xie, L., Qi, H., Huang, Q., Tian, Q., 2019. CenterNet: Keypoint Triplets for Object Detection. In: 2019 IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 6568\u20136577.","DOI":"10.1109\/ICCV.2019.00667"},{"key":"10.1016\/j.engappai.2026.114121_b14","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Bai, H., Xu, Y., Liao, C., Ling, H., 2019. Lasot: A high-quality benchmark for large-scale single object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 5374\u20135383.","DOI":"10.1109\/CVPR.2019.00552"},{"key":"10.1016\/j.engappai.2026.114121_b15","doi-asserted-by":"crossref","unstructured":"Fu, Z., Fu, Z., Liu, Q., Cai, W., Wang, Y., 2022. SparseTT: Visual Tracking with Sparse Transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR.","DOI":"10.24963\/ijcai.2022\/127"},{"issue":"4","key":"10.1016\/j.engappai.2026.114121_b16","doi-asserted-by":"crossref","first-page":"1612","DOI":"10.1109\/TITS.2019.2930337","article-title":"Manifold siamese network: A novel visual tracking ConvNet for autonomous vehicles","volume":"21","author":"Gao","year":"2020","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.engappai.2026.114121_b17","doi-asserted-by":"crossref","unstructured":"Gao, H., Wang, Z., Ji, S., 2020b. Kronecker attention networks. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. pp. 229\u2013237.","DOI":"10.1145\/3394486.3403065"},{"key":"10.1016\/j.engappai.2026.114121_b18","doi-asserted-by":"crossref","unstructured":"Guo, D., Shao, Y., Cui, Y., Wang, Z., Zhang, L., Shen, C., 2021. Graph attention tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 9543\u20139552.","DOI":"10.1109\/CVPR46437.2021.00942"},{"key":"10.1016\/j.engappai.2026.114121_b19","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, J., Cui, Y., Wang, Z., Chen, S., 2020. SiamCAR: Siamese fully convolutional classification and regression for visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 6269\u20136277.","DOI":"10.1109\/CVPR42600.2020.00630"},{"key":"10.1016\/j.engappai.2026.114121_b20","series-title":"Advances in Neural Information Processing Systems","article-title":"Divert more attention to vision-language tracking","author":"Guo","year":"2022"},{"key":"10.1016\/j.engappai.2026.114121_b21","series-title":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI-22","first-page":"927","article-title":"Learning target-aware representation for visual tracking via informative interactions","author":"Guo","year":"2022"},{"key":"10.1016\/j.engappai.2026.114121_b22","doi-asserted-by":"crossref","unstructured":"Hassani, A., Walton, S., Li, J., Li, S., Shi, H., 2023. Neighborhood Attention Transformer. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 6185\u20136194.","DOI":"10.1109\/CVPR52729.2023.00599"},{"key":"10.1016\/j.engappai.2026.114121_b23","series-title":"Axial attention in multidimensional transformers","author":"Ho","year":"2019"},{"key":"10.1016\/j.engappai.2026.114121_b24","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhang, L., Cheng, M.-M., Feng, J., 2020. Strip pooling: Rethinking spatial pooling for scene parsing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 4003\u20134012.","DOI":"10.1109\/CVPR42600.2020.00406"},{"key":"10.1016\/j.engappai.2026.114121_b25","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J., 2021. Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 13713\u201313722.","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"10.1016\/j.engappai.2026.114121_b26","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W., 2019. Ccnet: Criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 603\u2013612.","DOI":"10.1109\/ICCV.2019.00069"},{"issue":"5","key":"10.1016\/j.engappai.2026.114121_b27","doi-asserted-by":"crossref","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","article-title":"GOT-10k: A large high-diversity benchmark for generic object tracking in the wild","volume":"43","author":"Huang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.114121_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2025.107866","article-title":"Automatic colorectal cancer detection using machine learning and deep learning based on feature selection in histopathological images","volume":"107","author":"Junaid","year":"2025","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.engappai.2026.114121_b29","doi-asserted-by":"crossref","unstructured":"Kang, B., Chen, X., Wang, D., Peng, H., Lu, H., 2023. Exploring lightweight hierarchical vision transformers for efficient visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 9612\u20139621.","DOI":"10.1109\/ICCV51070.2023.00881"},{"key":"10.1016\/j.engappai.2026.114121_b30","doi-asserted-by":"crossref","unstructured":"Kiani Galoogahi, H., Fagg, A., Huang, C., Ramanan, D., Lucey, S., 2017. Need for speed: A benchmark for higher frame rate object tracking. In: Proceedings of the IEEE International Conference on Computer Vision. ICCV, pp. 1125\u20131134.","DOI":"10.1109\/ICCV.2017.128"},{"key":"10.1016\/j.engappai.2026.114121_b31","article-title":"ImageNet classification with deep convolutional neural networks","volume":"vol. 25","author":"Krizhevsky","year":"2012"},{"key":"10.1016\/j.engappai.2026.114121_b32","doi-asserted-by":"crossref","unstructured":"Li, P., Chen, X., Shen, S., 2019a. Stereo R-CNN Based 3D Object Detection for Autonomous Driving. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 7636\u20137644.","DOI":"10.1109\/CVPR.2019.00783"},{"key":"10.1016\/j.engappai.2026.114121_b33","article-title":"Hierarchical siamese network for real-time visual tracking","volume":"238","author":"Li","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.114121_b34","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J., 2019b. Siamrpn++: Evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 4282\u20134291.","DOI":"10.1109\/CVPR.2019.00441"},{"key":"10.1016\/j.engappai.2026.114121_b35","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., Hu, X., 2018. High Performance Visual Tracking with Siamese Region Proposal Network. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 8971\u20138980.","DOI":"10.1109\/CVPR.2018.00935"},{"key":"10.1016\/j.engappai.2026.114121_b36","first-page":"16743","article-title":"SwinTrack: A simple and strong baseline for transformer tracking","volume":"vol. 35","author":"Lin","year":"2022"},{"key":"10.1016\/j.engappai.2026.114121_b37","series-title":"European Conference on Computer Vision","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.engappai.2026.114121_b38","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B., 2021. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. In: 2021 IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 9992\u201310002.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10.1016\/j.engappai.2026.114121_b39","unstructured":"Loshchilov, I., Hutter, F., 2019. Decoupled Weight Decay Regularization. In: International Conference on Learning Representations. ICLR."},{"key":"10.1016\/j.engappai.2026.114121_b40","doi-asserted-by":"crossref","unstructured":"Lukezic, A., Matas, J., Kristan, M., 2020. D3s-a discriminative single shot segmentation tracker. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 7133\u20137142.","DOI":"10.1109\/CVPR42600.2020.00716"},{"key":"10.1016\/j.engappai.2026.114121_b41","doi-asserted-by":"crossref","unstructured":"Ma, F., Shou, M.Z., Zhu, L., Fan, H., Xu, Y., Yang, Y., Yan, Z., 2022. Unified transformer tracker for object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 8781\u20138790.","DOI":"10.1109\/CVPR52688.2022.00858"},{"key":"10.1016\/j.engappai.2026.114121_b42","series-title":"European Conference on Computer Vision","first-page":"445","article-title":"A benchmark and simulator for uav tracking","author":"Mueller","year":"2016"},{"key":"10.1016\/j.engappai.2026.114121_b43","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B., 2018. Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European Conference on Computer Vision (ECCV). ECCV, pp. 300\u2013317.","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"10.1016\/j.engappai.2026.114121_b44","doi-asserted-by":"crossref","first-page":"6194","DOI":"10.1109\/TMM.2022.3206668","article-title":"Learning localization-aware target confidence for siamese visual tracking","volume":"25","author":"Nie","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114121_b45","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S., 2019. Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 658\u2013666.","DOI":"10.1109\/CVPR.2019.00075"},{"key":"10.1016\/j.engappai.2026.114121_b46","doi-asserted-by":"crossref","unstructured":"Shao, Y., He, S., Ye, Q., Feng, Y., Luo, W., Chen, J., 2024. Context-Aware Integration of Language and Visual References for Natural Language Tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 19208\u201319217.","DOI":"10.1109\/CVPR52733.2024.01817"},{"key":"10.1016\/j.engappai.2026.114121_b47","series-title":"Context-aware visual tracking with joint meta-updating","author":"Shen","year":"2022"},{"key":"10.1016\/j.engappai.2026.114121_b48","doi-asserted-by":"crossref","unstructured":"Subakan, C., Ravanelli, M., Cornell, S., Bronzi, M., Zhong, J., 2021. Attention Is All You Need In Speech Separation. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing. ICASSP, pp. 21\u201325.","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"key":"10.1016\/j.engappai.2026.114121_b49","doi-asserted-by":"crossref","unstructured":"Tang, F., Ling, Q., 2022. Ranking-Based Siamese Visual Tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 8741\u20138750.","DOI":"10.1109\/CVPR52688.2022.00854"},{"issue":"6","key":"10.1016\/j.engappai.2026.114121_b50","doi-asserted-by":"crossref","first-page":"3645","DOI":"10.1007\/s11263-025-02345-2","article-title":"SeaFormer++: Squeeze-enhanced axial transformer for mobile visual recognition","volume":"133","author":"Wan","year":"2025","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.engappai.2026.114121_b51","doi-asserted-by":"crossref","unstructured":"Wang, X., Shu, X., Zhang, Z., Jiang, B., Wang, Y., Tian, Y., Wu, F., 2021a. Towards More Flexible and Accurate Object Tracking With Natural Language: Algorithms and Benchmark. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 13763\u201313773.","DOI":"10.1109\/CVPR46437.2021.01355"},{"key":"10.1016\/j.engappai.2026.114121_b52","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L., 2021b. Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions. In: 2021 IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 548\u2013558.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"10.1016\/j.engappai.2026.114121_b53","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1109\/TMM.2023.3264851","article-title":"CMAT: Integrating convolution mixer and self-attention for visual tracking","volume":"26","author":"Wang","year":"2024","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114121_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.129444","article-title":"Binding double-head predictor with key context transformer for high performance tracking","volume":"623","author":"Wang","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.engappai.2026.114121_b55","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., Hu, W., Torr, P.H., 2019. Fast online object tracking and segmentation: A unifying approach. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. CVPR.","DOI":"10.1109\/CVPR.2019.00142"},{"key":"10.1016\/j.engappai.2026.114121_b56","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., Li, H., 2021c. Transformer meets tracker: Exploiting temporal context for robust visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 1571\u20131580.","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"10.1016\/j.engappai.2026.114121_b57","series-title":"European Conference on Computer Vision","first-page":"108","article-title":"Axial-deeplab: Stand-alone axial-attention for panoptic segmentation","author":"Wang","year":"2020"},{"issue":"9","key":"10.1016\/j.engappai.2026.114121_b58","doi-asserted-by":"crossref","first-page":"1834","DOI":"10.1109\/TPAMI.2014.2388226","article-title":"Object tracking benchmark","volume":"37","author":"Wu","year":"2015","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.114121_b59","doi-asserted-by":"crossref","unstructured":"Xie, F., Wang, C., Wang, G., Cao, Y., Yang, W., Zeng, W., 2022. Correlation-aware deep tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 8751\u20138760.","DOI":"10.1109\/CVPR52688.2022.00855"},{"key":"10.1016\/j.engappai.2026.114121_b60","doi-asserted-by":"crossref","unstructured":"Xie, F., Wang, C., Wang, G., Yang, W., Zeng, W., 2021. Learning Tracking Representations via Dual-Branch Fully Transformer Networks. In: 2021 IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 2688\u20132697.","DOI":"10.1109\/ICCVW54120.2021.00303"},{"key":"10.1016\/j.engappai.2026.114121_b61","first-page":"8727","article-title":"Robust tracking via mamba-based context-aware token learning","volume":"vol. 39, no. 8","author":"Xie","year":"2025"},{"key":"10.1016\/j.engappai.2026.114121_b62","doi-asserted-by":"crossref","unstructured":"Xie, J., Zhong, B., Mo, Z., Zhang, S., Shi, L., Song, S., Ji, R., 2024. Autoregressive queries for adaptive tracking with spatio-temporal transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 19300\u201319309.","DOI":"10.1109\/CVPR52733.2024.01826"},{"key":"10.1016\/j.engappai.2026.114121_b63","doi-asserted-by":"crossref","unstructured":"Xing, D., Evangeliou, N., Tsoukalas, A., Tzes, A., 2022. Siamese Transformer Pyramid Networks for Real-Time UAV Tracking. In: 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision. WACV, pp. 1898\u20131907.","DOI":"10.1109\/WACV51458.2022.00196"},{"key":"10.1016\/j.engappai.2026.114121_b64","first-page":"12549","article-title":"Siamfc++: Towards robust and accurate visual tracking with target estimation guidelines","volume":"vol. 34, no. 07","author":"Xu","year":"2020"},{"key":"10.1016\/j.engappai.2026.114121_b65","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., Lu, H., 2021. Learning Spatio-Temporal Transformer for Visual Tracking. In: 2021 IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 10428\u201310437.","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"10.1016\/j.engappai.2026.114121_b66","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111278","article-title":"Adaptively bypassing vision transformer blocks for efficient visual tracking","volume":"161","author":"Yang","year":"2025","journal-title":"Pattern Recognit."},{"issue":"3","key":"10.1016\/j.engappai.2026.114121_b67","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1109\/TCE.2023.3251407","article-title":"BANDT: A border-aware network with deformable transformers for visual tracking","volume":"69","author":"Yang","year":"2023","journal-title":"IEEE Trans. Consum. Electron."},{"key":"10.1016\/j.engappai.2026.114121_b68","doi-asserted-by":"crossref","unstructured":"Ye, B., Chang, H., Ma, B., Shan, S., Chen, X., 2022. Joint feature learning and relation modeling for tracking: A one-stream framework. In: European Conference on Computer Vision. ECCV, pp. 341\u2013357.","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"10.1016\/j.engappai.2026.114121_b69","series-title":"Mobile vision transformer-based visual object tracking","first-page":"arXiv","author":"Yelluru Gopal","year":"2023"},{"key":"10.1016\/j.engappai.2026.114121_b70","series-title":"2025 IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"9468","article-title":"Improving accuracy and generalization for efficient visual tracking","author":"Zaveri","year":"2025"},{"key":"10.1016\/j.engappai.2026.114121_b71","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Liu, Y., Wang, X., Li, B., Hu, W., 2021. Learn to match: Automatic matching network design for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 13339\u201313348.","DOI":"10.1109\/ICCV48922.2021.01309"},{"key":"10.1016\/j.engappai.2026.114121_b72","series-title":"European Conference on Computer Vision","first-page":"771","article-title":"Ocean: Object-aware anchor-free tracking","author":"Zhang","year":"2020"},{"key":"10.1016\/j.engappai.2026.114121_b73","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110511","article-title":"Learning adaptive distractor-aware-suppression appearance model for visual tracking","volume":"150","author":"Zhang","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.114121_b74","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Pei, W., Li, X., Wang, H., Zheng, F., He, Z., 2021. Saliency-associated object tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. ICCV, pp. 9866\u20139875.","DOI":"10.1109\/ICCV48922.2021.00972"},{"key":"10.1016\/j.engappai.2026.114121_b75","article-title":"Exploring dynamic transformer for efficient object tracking","author":"Zhu","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.engappai.2026.114121_b76","article-title":"SRRT: Exploring search region regulation for visual object tracking","author":"Zhu","year":"2024","journal-title":"IEEE Trans. Circuits Syst. Video Technol."}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004021?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004021?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T08:44:49Z","timestamp":1771490689000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626004021"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":76,"alternative-id":["S0952197626004021"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114121","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Compact axial attention with detail enhancement for visual object tracking","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114121","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114121"}}