{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:43:24Z","timestamp":1773251004419,"version":"3.50.1"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.imavis.2026.105943","type":"journal-article","created":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:19:49Z","timestamp":1771949989000},"page":"105943","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["TCM-tracking: Real-time cross-modal interactive RGBT tracking based on temporal correlation"],"prefix":"10.1016","volume":"168","author":[{"given":"Yuhan","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Yue","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Kehan","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Dongxiang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Bingyou","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Guoyang","family":"Wan","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2026.105943_b1","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1007\/s10489-024-06057-x","article-title":"Channel enhanced cross-modality relation network for visible-infrared person re-identification","author":"Song","year":"2025","journal-title":"Appl. Intell."},{"key":"10.1016\/j.imavis.2026.105943_b2","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102429","article-title":"Fusion-driven deep feature network for enhanced object detection and tracking in video surveillance systems","author":"Jain","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.imavis.2026.105943_b3","doi-asserted-by":"crossref","first-page":"3187","DOI":"10.1109\/TIP.2024.3393298","article-title":"QueryTrack: Joint-modality query fusion network for RGBT tracking","author":"Fan","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105943_b4","first-page":"393","article-title":"Object tracking in RGB-t videos using modal-aware attention network and competitive learning.","author":"Hui","year":"2020","journal-title":"Sensors (Basel, Switzerland)"},{"key":"10.1016\/j.imavis.2026.105943_b5","doi-asserted-by":"crossref","first-page":"5613","DOI":"10.1109\/TIP.2021.3087341","article-title":"RGBT tracking via multi-adapter network with hierarchical divergence loss","author":"Lu","year":"2021","journal-title":"IEEE Trans. Image Process.: A Publ. IEEE Signal Process. Soc."},{"key":"10.1016\/j.imavis.2026.105943_b6","series-title":"AAAI-22 Technical Tracks 3","article-title":"Attribute-based progressive fusion network for RGBT tracking","author":"Xiao","year":"2022"},{"key":"10.1016\/j.imavis.2026.105943_b7","doi-asserted-by":"crossref","DOI":"10.1016\/j.ymssp.2023.110753","article-title":"Holistically nested edge detection and particle filtering for subtle vibration extraction","volume":"204","author":"Valente","year":"2023","journal-title":"Mech. Syst. Signal Process."},{"key":"10.1016\/j.imavis.2026.105943_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.ymssp.2022.109233","article-title":"Streamlined particle filtering of phase-based magnified videos for quantified operational deflection shapes","volume":"177","author":"Valente","year":"2022","journal-title":"Mech. Syst. Signal Process."},{"key":"10.1016\/j.imavis.2026.105943_b9","series-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014"},{"key":"10.1016\/j.imavis.2026.105943_b10","series-title":"Siamfc++: Towards robust and accurate visual tracking with target estimation guidelines","author":"Xu","year":"2020"},{"key":"10.1016\/j.imavis.2026.105943_b11","series-title":"Siamese infrared and visible light fusion network for RGB-t tracking","author":"Jingchao","year":"2021"},{"key":"10.1016\/j.imavis.2026.105943_b12","doi-asserted-by":"crossref","unstructured":"G. Bhat, M. Danelljan, L.V. Gool, R. Timofte, Learning Discriminative Model Prediction for Tracking, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, ICCV, 2019.","DOI":"10.1109\/ICCV.2019.00628"},{"key":"10.1016\/j.imavis.2026.105943_b13","first-page":"393","article-title":"Object tracking in RGB-t videos using modal-aware attention network and competitive learning.","author":"Hui","year":"2020","journal-title":"Sensors (Basel, Switzerland)"},{"key":"10.1016\/j.imavis.2026.105943_b14","doi-asserted-by":"crossref","first-page":"5613","DOI":"10.1109\/TIP.2021.3087341","article-title":"RGBT tracking via multi-adapter network with hierarchical divergence loss","author":"Lu","year":"2021","journal-title":"IEEE Trans. Image Process. : A Publ. IEEE Signal Process. Soc."},{"key":"10.1016\/j.imavis.2026.105943_b15","first-page":"1","article-title":"Duality-gated mutual condition network for RGBT tracking","author":"Lu","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.imavis.2026.105943_b16","doi-asserted-by":"crossref","unstructured":"Y. Xiao, M. Yang, C. Li, L. Liu, J. Tang, Attribute-Based Progressive Fusion Network for RGB-T Tracking, in: Proceedings of the AAAI Conference on Artificial Intelligence, 2022, pp. 2831\u20132838.","DOI":"10.1609\/aaai.v36i3.20187"},{"key":"10.1016\/j.imavis.2026.105943_b17","series-title":"Dense feature aggregation and pruning for RGBT tracking","author":"Zhu","year":"2019"},{"key":"10.1016\/j.imavis.2026.105943_b18","series-title":"Multi-adapter RGBT tracking","author":"Li","year":"2019"},{"key":"10.1016\/j.imavis.2026.105943_b19","doi-asserted-by":"crossref","unstructured":"C. Wang, C. Xu, Z. Cui, L. Zhou, T. Zhang, X. Zhang, J. Yang, Cross-Modal Pattern-Propagation for RGB-T Tracking, in: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020.","DOI":"10.1109\/CVPR42600.2020.00709"},{"key":"10.1016\/j.imavis.2026.105943_b20","doi-asserted-by":"crossref","first-page":"3335","DOI":"10.1109\/TIP.2021.3060862","article-title":"Jointly modeling motion and appearance cues for robust RGB-t tracking","author":"Zhang","year":"2021","journal-title":"IEEE Trans. Image Process.: A Publ. IEEE Signal Process. Soc."},{"key":"10.1016\/j.imavis.2026.105943_b21","doi-asserted-by":"crossref","first-page":"3822","DOI":"10.1109\/TITS.2022.3229830","article-title":"Dynamic fusion network for RGBT tracking","author":"Peng","year":"2023","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.imavis.2026.105943_b22","first-page":"9516","article-title":"Visual prompt multi-modal tracking","author":"Zhu","year":"2023","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.imavis.2026.105943_b23","series-title":"SDSTrack: Self-distillation symmetric adapter learning for multi-modal visual object tracking","author":"Hou","year":"2024"},{"key":"10.1016\/j.imavis.2026.105943_b24","first-page":"222","article-title":"Challenge-aware RGBT tracking","author":"Li","year":"2020"},{"key":"10.1016\/j.imavis.2026.105943_b25","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1109\/TIV.2020.2980735","article-title":"Quality-aware feature aggregation network for robust RGBT tracking","author":"Zhu","year":"2021","journal-title":"IEEE Transact. Intell. Vehicles"},{"key":"10.1016\/j.imavis.2026.105943_b26","series-title":"Visible-thermal UAV tracking: A large-scale benchmark and new baseline","author":"Zhang","year":"2022"},{"key":"10.1016\/j.imavis.2026.105943_b27","doi-asserted-by":"crossref","unstructured":"T. Hui, Z. Xun, F. Peng, J. Huang, X. Wei, X. Wei, J. Dai, J. Han, S. Liu, Bridging Search Region Interaction with Template for RGB-T Tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 13630\u201313639.","DOI":"10.1109\/CVPR52729.2023.01310"},{"key":"10.1016\/j.imavis.2026.105943_b28","series-title":"AFTER: Attention-based fusion router for RGBT tracking","author":"Lu","year":"2024"},{"key":"10.1016\/j.imavis.2026.105943_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2019.115756","article-title":"Dsiammft: An RGB-t fusion tracking method via dynamic siamese networks using multi-layer feature fusion.","author":"Zhang","year":"2020","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.imavis.2026.105943_b30","series-title":"Temporal aggregation for adaptive RGBT tracking","author":"Tang","year":"2022"},{"key":"10.1016\/j.imavis.2026.105943_b31","series-title":"Pytorch: An imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"key":"10.1016\/j.imavis.2026.105943_b32","series-title":"Joint feature learning and relation modeling for tracking: A one-stream framework","author":"Ye","year":"2022"},{"key":"10.1016\/j.imavis.2026.105943_b33","doi-asserted-by":"crossref","first-page":"439","DOI":"10.1007\/s11263-020-01387-y","article-title":"LaSOT: A high-quality large-scale single object tracking benchmark","author":"Fan","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2026.105943_b34","series-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"issue":"12","key":"10.1016\/j.imavis.2026.105943_b35","doi-asserted-by":"crossref","first-page":"5743","DOI":"10.1109\/TIP.2016.2614135","article-title":"Learning collaborative sparse representation for grayscale-thermal tracking","volume":"25","author":"Li","year":"2016","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105943_b36","doi-asserted-by":"crossref","unstructured":"C. Li, N. Zhao, Y. Lu, C. Zhu, J. Tang, Weighted Sparse Representation Regularized Graph Learning for RGB-T Object Tracking, in: Acm on Multimedia Conference, 2017, pp. 1856\u20131864.","DOI":"10.1145\/3123266.3123289"},{"key":"10.1016\/j.imavis.2026.105943_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2019.106977","article-title":"RGB-t object tracking: Benchmark and baseline.","author":"Li","year":"2019","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.imavis.2026.105943_b38","doi-asserted-by":"crossref","first-page":"392","DOI":"10.1109\/TIP.2021.3130533","article-title":"Lasher: A large-scale high-diversity benchmark for RGB-t tracking","volume":"31","author":"Li","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105943_b39","unstructured":"Z. Pengyu, J. Zhao, D. Wang, H. Lu, X. Ruan, Visible-Thermal UAV Tracking: A Large-Scale Benchmark and New Baseline, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2022."},{"key":"10.1016\/j.imavis.2026.105943_b40","doi-asserted-by":"crossref","unstructured":"T. Zhang, H. Guo, Q. Jiao, Q. Zhang, J. Han, Efficient RGB-T tracking via cross-modality distillation, in: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Proceedings, 2023, pp. 5404\u20135413.","DOI":"10.1109\/CVPR52729.2023.00523"},{"key":"10.1016\/j.imavis.2026.105943_b41","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIP.2018.2828326","article-title":"RGBT tracking via challenge-based appearance disentanglement and interaction","author":"Liu","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105943_b42","series-title":"OneTracker: Unifying visual object tracking with foundation models and efficient tuning","author":"Hong","year":"2024"},{"key":"10.1016\/j.imavis.2026.105943_b43","series-title":"Single-model and any-modality for video object tracking","author":"Wu","year":"2023"}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885626000491?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885626000491?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T22:06:32Z","timestamp":1773180392000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885626000491"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":43,"alternative-id":["S0262885626000491"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2026.105943","relation":{},"ISSN":["0262-8856"],"issn-type":[{"value":"0262-8856","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"TCM-tracking: Real-time cross-modal interactive RGBT tracking based on temporal correlation","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2026.105943","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"105943"}}