{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:19:37Z","timestamp":1777889977948,"version":"3.51.4"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031723315","type":"print"},{"value":"9783031723322","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72332-2_29","type":"book-chapter","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T15:04:02Z","timestamp":1726499042000},"page":"447-460","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Transformer Tracker Based on Multi-level Residual Perception Structure"],"prefix":"10.1007","author":[{"given":"Zhenhai","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lutao","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ying","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyu","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,17]]},"reference":[{"key":"29_CR1","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.: Fully-Convolutional Siamese Networks for Object Tracking, pp. 850\u2013865. Springer (2016)","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"29_CR2","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., Hu, X.: High Performance Visual Tracking with Siamese Region Proposal Network, pp. 8971\u20138980 (2018)","DOI":"10.1109\/CVPR.2018.00935"},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J.: \u2018Siamrpn++: Evolution of Siamese Visual Tracking with Very Deep Networks, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"29_CR4","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., Yu, G.: Siamfc++: Towards Robust and Accurate Visual Tracking with Target Estimation Guidelines, pp. 12549\u201312556 (2020)","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"29_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H.: Transformer Tracking, pp. 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"29_CR6","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., Lu, H.: Learning Spatio-temporal Transformer For Visual Tracking, pp. 10448\u201310457 (2021)","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"29_CR7","doi-asserted-by":"crossref","unstructured":"Chen, B., et al.: Backbone is All Your Need: A Simplified Architecture for Visual Object Tracking, pp. 375\u2013392. Springer (2022)","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"29_CR8","doi-asserted-by":"crossref","unstructured":"Ye, B., Chang, H., Ma, B., Shan, S., Chen, X.: Joint Feature Learning and Relation Modeling for Tracking: A One-Stream Framework, pp. 341\u2013357. Springer (2022)","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"29_CR9","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"29_CR10","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Tokens-to-Token Vit: Training Vision Transformers From Scratch on Imagenet, pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"29_CR11","doi-asserted-by":"crossref","unstructured":"Yue, X., et al.: Vision Transformer with Progressive Sampling, pp. 387\u2013396 (2021)","DOI":"10.1109\/ICCV48922.2021.00044"},{"key":"29_CR12","unstructured":"Hatamizadeh, A., et al.: FasterViT: Fast Vision Transformers with Hierarchical Attention (2023)"},{"key":"29_CR13","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"29_CR14","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: \u2018End-to-End Object Detection with Transformers, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"29_CR15","unstructured":"Lv, W., et al.: Detrs beat yolos on real-time object detection (2023)"},{"key":"29_CR16","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"29_CR17","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"29_CR18","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., Xu, Y., Ling, H.: Swintrack: a simple and strong baseline for transformer tracking. Adv. Neural. Inf. Process. Syst. 35, 16743\u201316754 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"29_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin Transformer: Hierarchical Vision Transformer Using Shifted Windows, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"6","key":"29_CR20","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017)","journal-title":"Commun. ACM"},{"key":"29_CR21","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, J., Cui, Y., Wang, Z., Chen, S.: SiamCAR: Siamese Fully Convolutional Classification and Regression for Visual Tracking, pp. 6269\u20136277 (2020)","DOI":"10.1109\/CVPR42600.2020.00630"},{"key":"29_CR22","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: SiamBAN: target-aware tracking with siamese box adaptive network. IEEE Trans. Pattern Anal. Mach. Intell. (2022)","DOI":"10.1109\/TPAMI.2022.3195759"},{"key":"29_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep Residual Learning for Image Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"29_CR24","doi-asserted-by":"crossref","unstructured":"Xing, D., Evangeliou, N., Tsoukalas, A., Tzes, A.: Siamese Transformer Pyramid Networks for Real-Time UAV Tracking, pp. 2139\u20132148 (2022)","DOI":"10.1109\/WACV51458.2022.00196"},{"key":"29_CR25","doi-asserted-by":"crossref","unstructured":"Law, H., and Deng, J.: \u2018Cornernet: Detecting objects as paired keypoints\u2019, (Eds.): \u2018Book Cornernet: Detecting objects as paired keypoints\u2019 (2018, edn.), pp. 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"29_CR26","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal Loss for Dense Object Detection, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"29_CR27","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"29_CR28","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: Imagenet: A Large-Scale Hierarchical Image Database, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"29_CR29","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"issue":"5","key":"29_CR30","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., Huang, K.: Got-10k: a large high-diversity benchmark for generic object tracking in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1562\u20131577 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"29_CR31","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B.: Trackingnet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild, pp. 300\u2013317 (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"29_CR32","doi-asserted-by":"crossref","unstructured":"Fan, H., et al.: Lasot: A High-Quality Benchmark for Large-Scale Single Object Tracking, pp. 5374\u20135383 (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"key":"29_CR33","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., et al.: Microsoft Coco: Common Objects in Context, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"29_CR34","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., Ghanem, B.: A Benchmark and Simulator for UAV Tracking, pp. 445\u2013461. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"29_CR35","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F.S., Felsberg, M.: Atom: Accurate Tracking by Overlap Maximization, pp. 4660\u20134669 (2019)","DOI":"10.1109\/CVPR.2019.00479"},{"key":"29_CR36","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L.V., Timofte, R.: Learning Discriminative Model Prediction for Tracking, pp. 6182\u20136191 (2019)","DOI":"10.1109\/ICCV.2019.00628"},{"key":"29_CR37","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., Li, H.: Transformer Meets Tracker: Exploiting Temporal Context for Robust Visual Tracking, pp. 1571\u20131580 (2021)","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"29_CR38","doi-asserted-by":"crossref","unstructured":"Cao, Z., Fu, C., Ye, J., Li, B., Li, Y.: SiamAPN++: Siamese Attentional Aggregation Network for Real-Time UAV Tracking, pp. 3086\u20133092. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636309"},{"key":"29_CR39","doi-asserted-by":"crossref","unstructured":"Zheng, G., Fu, C., Ye, J., Li, B., Lu, G., Pan, J.: Scale-aware siamese object tracking for vision-based UAM approaching. IEEE Trans. Indust. Inf. (2022)","DOI":"10.1109\/IROS47612.2022.9982189"},{"key":"29_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., Hu, W., Torr, P.H.: Fast Online Object Tracking and Segmentation: A Unifying Approach, pp. 1328\u20131338 (2019)","DOI":"10.1109\/CVPR.2019.00142"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72332-2_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T15:18:27Z","timestamp":1726499907000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72332-2_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723315","9783031723322"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72332-2_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"17 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"It is now necessary to declare any competing interests or to specifically state that the authors have no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}