{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T00:32:42Z","timestamp":1774398762461,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2023,7,28]],"date-time":"2023-07-28T00:00:00Z","timestamp":1690502400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,28]],"date-time":"2023-07-28T00:00:00Z","timestamp":1690502400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Natural Science Foundation for the Higher Education Institutions of Anhui Province","award":["No. KJ2021A0044"],"award-info":[{"award-number":["No. KJ2021A0044"]}]},{"name":"Hefei Natural Science Foundation","award":["No. HZ22ZK001"],"award-info":[{"award-number":["No. HZ22ZK001"]}]},{"name":"University Synergy Innovation Program of Anhui Province","award":["No. GXXT-2021-038"],"award-info":[{"award-number":["No. GXXT-2021-038"]}]},{"name":"University Synergy Innovation Program of Anhui Province","award":["GXXT-2022-042"],"award-info":[{"award-number":["GXXT-2022-042"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62076003"],"award-info":[{"award-number":["No. 62076003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10489-023-04741-y","type":"journal-article","created":{"date-parts":[[2023,7,28]],"date-time":"2023-07-28T15:01:26Z","timestamp":1690556486000},"page":"24709-24723","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Siamese transformer RGBT tracking"],"prefix":"10.1007","volume":"53","author":[{"given":"Futian","family":"Wang","sequence":"first","affiliation":[]},{"given":"Wenqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Chenglong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,28]]},"reference":[{"key":"4741_CR1","doi-asserted-by":"crossref","unstructured":"Zhu Y, Li C, Luo B, Tang J, Wang X (2019) Dense feature aggregation and pruning for rgbt tracking. In: Proceedings of the ACM International conference on multimedia, pp 465\u2013472","DOI":"10.1145\/3343031.3350928"},{"key":"4741_CR2","doi-asserted-by":"crossref","unstructured":"Gao Y, Li C, Zhu Y, Tang J, He T, Wang F (2019) Deep adaptive fusion network for high performance rgbt tracking. In: Proceedings of the IEEE\/CVF International conference on computer vision workshops, pp 0\u20130","DOI":"10.1109\/ICCVW.2019.00017"},{"key":"4741_CR3","unstructured":"Long Li C, Lu A, Hua Zheng A, Tu Z, Tang J (2019) Multi-adapter rgbt tracking. In: Proceedings of the IEEE\/CVF International conference on computer vision workshops, pp 0\u20130"},{"key":"4741_CR4","doi-asserted-by":"crossref","unstructured":"Li C, Liu L, Lu A, Ji Q, Tang J (2020) Challenge-aware rgbt tracking. In: European conference on computer vision, pp 222\u2013237","DOI":"10.1007\/978-3-030-58542-6_14"},{"issue":"9","key":"4741_CR5","doi-asserted-by":"publisher","first-page":"2714","DOI":"10.1007\/s11263-021-01495-3","volume":"129","author":"P Zhang","year":"2021","unstructured":"Zhang P, Wang D, Lu H, Yang X (2021) Learning adaptive attribute-driven representation for real-time rgb-t tracking. Int J Comput Vis 129(9):2714\u20132729","journal-title":"Int J Comput Vis"},{"key":"4741_CR6","doi-asserted-by":"crossref","unstructured":"Xiao Y, Yang M, Li C, Liu L, Tang J (2022) Attribute-based progressive fusion network for rgbt tracking. National Conference on Artificial Intelligence","DOI":"10.1609\/aaai.v36i3.20187"},{"key":"4741_CR7","doi-asserted-by":"crossref","unstructured":"Nam H and Han B (2016) Learning multi-domain convolutional neural networks for visual tracking. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 4293\u20134302","DOI":"10.1109\/CVPR.2016.465"},{"key":"4741_CR8","doi-asserted-by":"crossref","unstructured":"Chen Z, Zhong B, Li G, Zhang S, Ji R (2020) Siamese box adaptive network for visual tracking. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 6668\u20136677","DOI":"10.1109\/CVPR42600.2020.00670"},{"key":"4741_CR9","doi-asserted-by":"crossref","unstructured":"Li B, Wu W, Wang Q, Zhang F, Xing J, Yan J (2019) Siamrpn++: Evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 4282\u20134291","DOI":"10.1109\/CVPR.2019.00441"},{"key":"4741_CR10","doi-asserted-by":"crossref","unstructured":"Lu X, Li F, Zhao Y, Yang W (2022) A robust tracking architecture using tracking failure detection in siamese trackers","DOI":"10.1007\/s10489-022-04154-3"},{"issue":"6","key":"4741_CR11","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1007\/s10489-020-01992-x","volume":"51","author":"Y Meng","year":"2021","unstructured":"Meng Y, Deng Z, Zhao K, Xu Y, Liu H (2021) Hierarchical correlation siamese network for real-time object tracking. Applied Intell 51(6):3202\u20133211","journal-title":"Applied Intell"},{"key":"4741_CR12","doi-asserted-by":"crossref","unstructured":"Zhang T, Liu X, Zhang Q, Han J (2021) Siamcda: Complementarity-and distractor-aware rgb-t tracking based on siamese network. IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2021.3072207"},{"key":"4741_CR13","doi-asserted-by":"crossref","unstructured":"He F, Chen M, Chen X, Han J, Bai L (2022) Siamdl: Siamese dual-level fusion attention network for rgbt tracking. Available at SSRN 4209345","DOI":"10.2139\/ssrn.4209345"},{"key":"4741_CR14","doi-asserted-by":"crossref","unstructured":"Cui Y, Jiang C, Wang L, Wu G (2022) Mixformer: End-to-end tracking with iterative mixed attention. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 13608\u201313618","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"4741_CR15","doi-asserted-by":"crossref","unstructured":"Li Y, Yu AW, Meng T, Caine B, Ngiam J, Peng D, Shen J, Lu Y, Zhou D, Le QV, et al (2022) Deepfusion: Lidar-camera deep fusion for multi-modal 3d object detection. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 17182\u201317191","DOI":"10.1109\/CVPR52688.2022.01667"},{"key":"4741_CR16","doi-asserted-by":"crossref","unstructured":"Liang J, Cao J, Sun G, Zhang K, Van Gool L, Timofte R (2021) Swinir: Image restoration using swin transformer. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 1833\u20131844","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"4741_CR17","doi-asserted-by":"crossref","unstructured":"Chen X, Yan B, Zhu J, Wang D, Yang X, Lu H (2021)Transformer tracking. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 8126\u20138135","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"4741_CR18","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S (2021) An image is worth 16x16 words: Transformers for image recognition at scale. In: International conference on learning representations"},{"key":"4741_CR19","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Advances in Neural Information Processing Systems 30"},{"key":"4741_CR20","doi-asserted-by":"crossref","unstructured":"Yan B, Peng H, Fu J, Wang D, Lu H (2021) Learning spatio-temporal transformer for visual tracking. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 10448\u201310457","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"4741_CR21","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin L, Fan H, Zhang Z, Xu Y, Ling H (2022) Swintrack: A simple and strong baseline for transformer tracking. Adv Neural Inf Process Syst 35:16743\u201316754","journal-title":"Adv Neural Inf Process Syst"},{"key":"4741_CR22","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"4741_CR23","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) Cvt: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp. 22\u201331","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"4741_CR24","unstructured":"Ba JL, Kiros JR, Hinton GE (2016) Layer normalization. arXiv:1607.06450"},{"key":"4741_CR25","doi-asserted-by":"crossref","unstructured":"Chollet F (2017) Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 1251\u20131258","DOI":"10.1109\/CVPR.2017.195"},{"key":"4741_CR26","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"4741_CR27","unstructured":"Kingma DP and Ba J (2015) Adam: A method for stochastic optimization. ICLR"},{"key":"4741_CR28","doi-asserted-by":"crossref","unstructured":"Rezatofighi H, Tsoi N, Gwak J, Sadeghian A, Reid I, Savarese S (2019) Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 658\u2013666","DOI":"10.1109\/CVPR.2019.00075"},{"key":"4741_CR29","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"4741_CR30","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: European conference on computer vision, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"4741_CR31","doi-asserted-by":"crossref","unstructured":"Muller M, Bibi A, Giancola S, Alsubaihi S, Ghanem B (2018) Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 300\u2013317","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"4741_CR32","doi-asserted-by":"crossref","unstructured":"Fan H, Lin L, Yang F, Chu P, Deng G, Yu S, Bai H, Xu Y, Liao C, Ling H (2020) Lasot: A high-quality benchmark for large-scale single object tracking. In: IEEE\/CVF Conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2019.00552"},{"key":"4741_CR33","doi-asserted-by":"crossref","unstructured":"Huang L, Zhao X, Huang K (2022) Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2019.2957464"},{"key":"4741_CR34","doi-asserted-by":"publisher","unstructured":"Li C, Cheng H, Hu S, Liu X, Tang J, Lin L (2016) Learning collaborative sparse representation for grayscale-thermal tracking. IEEE Transactions on Image Processing, pp 5743\u20135756 . https:\/\/doi.org\/10.1109\/tip.2016.2614135","DOI":"10.1109\/tip.2016.2614135"},{"key":"4741_CR35","doi-asserted-by":"crossref","unstructured":"Li C, Zhao N, Lu Y, Zhu C, Tang J (2017) Weighted sparse representation regularized graph learning for rgb-t object tracking. In: Proceedings of the ACM International conference on multimedia, pp 1856\u20131864","DOI":"10.1145\/3123266.3123289"},{"key":"4741_CR36","doi-asserted-by":"publisher","first-page":"06977","DOI":"10.1016\/j.patcog.2019.106977","volume":"96","author":"C Li","year":"2019","unstructured":"Li C, Liang X, Lu Y, Zhao N, Tang J (2019) Rgb-t object tracking: Benchmark and baseline. Pattern Recognit 96:06977","journal-title":"Pattern Recognit"},{"key":"4741_CR37","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1109\/TIP.2021.3130533","volume":"31","author":"C Li","year":"2021","unstructured":"Li C, Xue W, Jia Y, Qu Z, Luo B, Tang J, Sun D (2021) Lasher: A large-scale high-diversity benchmark for rgbt tracking. IEEE Trans Image Process 31:392\u2013404","journal-title":"IEEE Trans Image Process"},{"key":"4741_CR38","unstructured":"Pengyu Z, Zhao J, Wang D, Lu H, Ruan X (2022) Visible-thermal uav tracking: A large-scale benchmark and new baseline. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"4741_CR39","doi-asserted-by":"crossref","unstructured":"Wang C, Xu C, Cui Z, Zhou L, Zhang T, Zhang X, Yang J (2020) Cross-modal pattern-propagation for rgb-t tracking. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 7064\u20137073","DOI":"10.1109\/CVPR42600.2020.00709"},{"key":"4741_CR40","doi-asserted-by":"crossref","unstructured":"Zhang P, Zhao J, Bo C, Wang D, Lu H, Yang X (2021) Jointly modeling motion and appearance cues for robust rgb-t tracking. IEEE Transactions on Image Processing","DOI":"10.1109\/TIP.2021.3060862"},{"key":"4741_CR41","doi-asserted-by":"publisher","first-page":"5613","DOI":"10.1109\/TIP.2021.3087341","volume":"30","author":"A Lu","year":"2021","unstructured":"Lu A, Li C, Yan Y, Tang J, Luo B (2021) Rgbt tracking via multi-adapter network with hierarchical divergence loss. IEEE Trans Image Process 30:5613\u20135625","journal-title":"IEEE Trans Image Process"},{"key":"4741_CR42","doi-asserted-by":"crossref","unstructured":"Tu Z, Lin C, Zhao W, Li C, Tang J (2022) M5l: Multi-modal multi-margin metric learning for rgbt tracking. IEEE Transactions on Image Processing","DOI":"10.1109\/TIP.2021.3125504"},{"key":"4741_CR43","doi-asserted-by":"crossref","unstructured":"Danelljan M, Robinson A, Shahbaz Khan F, Felsberg M (2016) Beyond correlation filters: Learning continuous convolution operators for visual tracking. In: European Conference on Computer Vision, pp 472\u2013488","DOI":"10.1007\/978-3-319-46454-1_29"},{"key":"4741_CR44","doi-asserted-by":"crossref","unstructured":"Lu A, Qian C, Li C, Tang J, Wang L (2022) Duality-gated mutual condition network for rgbt tracking. IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2022.3157594"},{"issue":"2","key":"4741_CR45","doi-asserted-by":"publisher","first-page":"393","DOI":"10.3390\/s20020393","volume":"20","author":"H Zhang","year":"2020","unstructured":"Zhang H, Zhang L, Zhuo L, Zhang J (2020) Object tracking in rgb-t videos using modal-aware attention network and competitive learning. Sensors 20(2):393","journal-title":"Sensors"},{"key":"4741_CR46","doi-asserted-by":"crossref","unstructured":"Zhang L, Danelljan M, Gonzalez-Garcia A, van de Weijer J, Shahbaz Khan F (2019) Multi-modal fusion for end-to-end rgb-t tracking. In: Proceedings of the IEEE\/CVF International conference on computer vision workshops","DOI":"10.1109\/ICCVW.2019.00278"},{"key":"4741_CR47","unstructured":"Kristan M, Matas J, Leonardis A, Felsberg M, Pflugfelder R, Kamarainen J-K, Cehovin Zajc L, Drbohlav O, Lukezic A, Berg A, et al (2019) The seventh visual object tracking VOT2019 challenge results. In: Proceedings of the IEEE\/CVF International conference on computer vision workshops"},{"key":"4741_CR48","doi-asserted-by":"crossref","unstructured":"Feng M and Su J (2022) Learning reliable modal weight with transformer for robust rgbt tracking. Knowledge-Based Systems, 108945","DOI":"10.1016\/j.knosys.2022.108945"},{"key":"4741_CR49","doi-asserted-by":"crossref","unstructured":"Zhang L, Gonzalez-Garcia A, Weijer Jvd, Danelljan M, Khan FS (2019) Learning the model update for siamese trackers. In: The IEEE International Conference on Computer Vision (ICCV)","DOI":"10.1109\/ICCV.2019.00411"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04741-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-04741-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04741-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T14:08:25Z","timestamp":1698070105000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-04741-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,28]]},"references-count":49,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["4741"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-04741-y","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,28]]},"assertion":[{"value":"26 May 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}}]}}