{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:58:16Z","timestamp":1780934296083,"version":"3.54.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010814","name":"Anhui Province Department of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010814","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.patcog.2026.113991","type":"journal-article","created":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T23:29:46Z","timestamp":1779492586000},"page":"113991","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Temporal multimodal knowledge distillation for modality-missing RGBT tracking"],"prefix":"10.1016","volume":"180","author":[{"given":"Rui","family":"Ruan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yunlong","family":"Kang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7006-8584","authenticated-orcid":false,"given":"Jingpeng","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenglong","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jin","family":"Tang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113991_b1","doi-asserted-by":"crossref","unstructured":"T. Hui, Z. Xun, F. Peng, J. Huang, X. Wei, X. Wei, J. Dai, J. Han, S. Liu, Bridging search region interaction with template for RGB-T tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 13630\u201313639.","DOI":"10.1109\/CVPR52729.2023.01310"},{"key":"10.1016\/j.patcog.2026.113991_b2","doi-asserted-by":"crossref","unstructured":"T. Zhang, H. Guo, Q. Jiao, Q. Zhang, J. Han, Efficient rgb-t tracking via cross-modality distillation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 5404\u20135413.","DOI":"10.1109\/CVPR52729.2023.00523"},{"key":"10.1016\/j.patcog.2026.113991_b3","doi-asserted-by":"crossref","first-page":"112295","DOI":"10.1016\/j.patcog.2025.112295","article-title":"RGBT tracking via supervised mutual guiding","volume":"171","author":"Liu","year":"2026","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113991_b4","series-title":"Deep multimodal learning with missing modality: A survey","author":"Wu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113991_b5","first-page":"1","article-title":"Modality-missing RGBT tracking: Invertible prompt learning and high-quality benchmarks","author":"Lu","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113991_b6","doi-asserted-by":"crossref","unstructured":"Y. Zhang, N. He, J. Yang, Y. Li, D. Wei, Y. Huang, Y. Zhang, Z. He, Y. Zheng, mmformer: Multimodal medical transformer for incomplete multimodal learning of brain tumor segmentation, in: Proceedings of the International Conference on Medical Image Computing and Computer-Assisted Intervention, 2022, pp. 107\u2013117.","DOI":"10.1007\/978-3-031-16443-9_11"},{"key":"10.1016\/j.patcog.2026.113991_b7","first-page":"10074","article-title":"A unified self-distillation framework for multimodal sentiment analysis with uncertain missing modalities","volume":"vol. 38","author":"Li","year":"2024"},{"key":"10.1016\/j.patcog.2026.113991_b8","first-page":"927","article-title":"Bi-directional adapter for multimodal tracking","volume":"vol. 38","author":"Cao","year":"2024"},{"key":"10.1016\/j.patcog.2026.113991_b9","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11953","article-title":"Decoupled knowledge distillation","author":"Zhao","year":"2022"},{"key":"10.1016\/j.patcog.2026.113991_b10","doi-asserted-by":"crossref","first-page":"106977","DOI":"10.1016\/j.patcog.2019.106977","article-title":"RGB-t object tracking: Benchmark and baseline","volume":"96","author":"Li","year":"2019","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113991_b11","doi-asserted-by":"crossref","first-page":"392","DOI":"10.1109\/TIP.2021.3130533","article-title":"LasHeR: A large-scale high-diversity benchmark for RGBT tracking","volume":"31","author":"Li","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113991_b12","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8886","article-title":"Visible-thermal UAV tracking: A large-scale benchmark and new baseline","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113991_b13","series-title":"Proceedings of the International Joint Conference on Artificial Intelligence","first-page":"909","article-title":"Template-based uncertainty multimodal fusion network for RGBT tracking","author":"Ding","year":"2025"},{"key":"10.1016\/j.patcog.2026.113991_b14","doi-asserted-by":"crossref","first-page":"110984","DOI":"10.1016\/j.patcog.2024.110984","article-title":"UniRTL: A universal RGBT and low-light benchmark for object tracking","volume":"158","author":"Zhang","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113991_b15","doi-asserted-by":"crossref","unstructured":"H. Nam, B. Han, Learning Multi-domain Convolutional Neural Networks for Visual Tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2016, pp. 4293\u20134302.","DOI":"10.1109\/CVPR.2016.465"},{"key":"10.1016\/j.patcog.2026.113991_b16","doi-asserted-by":"crossref","first-page":"111330","DOI":"10.1016\/j.patcog.2024.111330","article-title":"Visible thermal multiple object tracking: Large-scale video dataset and progressive fusion approach","volume":"161","author":"Zhu","year":"2025","journal-title":"Pattern Recognit."},{"issue":"3","key":"10.1016\/j.patcog.2026.113991_b17","doi-asserted-by":"crossref","first-page":"1403","DOI":"10.1109\/TCSVT.2021.3072207","article-title":"SiamCDA: Complementarity- and distractor-aware RGB-T tracking based on siamese network","volume":"32","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113991_b18","doi-asserted-by":"crossref","first-page":"105473","DOI":"10.1016\/j.dsp.2025.105473","article-title":"Cross-modal information interaction of binocular predictive networks for RGBT tracking","volume":"168","author":"Chen","year":"2026","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.patcog.2026.113991_b19","doi-asserted-by":"crossref","first-page":"1753","DOI":"10.1109\/TIP.2024.3371355","article-title":"RGBT tracking via challenge-based appearance disentanglement and interaction","volume":"33","author":"Liu","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113991_b20","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops","first-page":"2262","article-title":"Multi-adapter RGBT tracking","author":"Long Li","year":"2019"},{"key":"10.1016\/j.patcog.2026.113991_b21","doi-asserted-by":"crossref","unstructured":"R. Hou, T. Ren, G. Wu, MIRNet: A Robust RGBT Tracking Jointly with Multi-Modal Interaction and Refinement, in: Proceedings of the IEEE International Conference on Multimedia and Expo, 2022, pp. 1\u20136.","DOI":"10.1109\/ICME52920.2022.9860018"},{"key":"10.1016\/j.patcog.2026.113991_b22","doi-asserted-by":"crossref","unstructured":"R. Hou, B. Xu, T. Ren, G. Wu, MTNet: Learning Modality-aware Representation with Transformer for RGBT Tracking, in: Proceedings of the IEEE International Conference on Multimedia and Expo, 2023, pp. 1163\u20131168.","DOI":"10.1109\/ICME55011.2023.00203"},{"key":"10.1016\/j.patcog.2026.113991_b23","doi-asserted-by":"crossref","unstructured":"L. Zhang, M. Danelljan, A. Gonzalez-Garcia, J. Van De Weijer, F. Shahbaz Khan, Multi-modal fusion for end-to-end RGB-T tracking, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, 2019, pp. 2252\u20132261.","DOI":"10.1109\/ICCVW.2019.00278"},{"key":"10.1016\/j.patcog.2026.113991_b24","first-page":"17117","article-title":"Incomplete multimodality-diffused emotion recognition","volume":"vol. 36","author":"Wang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113991_b25","first-page":"1657","article-title":"M3AE: Multimodal representation learning for brain tumor segmentation with missing modalities","volume":"vol. 37","author":"Liu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113991_b26","series-title":"Proceedings of the Chinese Conference on Pattern Recognition and Computer Vision","first-page":"671","article-title":"Information lossless multi-modal image generation for RGB-T tracking","author":"Li","year":"2022"},{"key":"10.1016\/j.patcog.2026.113991_b27","series-title":"Proceedings of the Medical Image Computing and Computer Assisted Intervention","first-page":"447","article-title":"Robust multimodal brain tumor segmentation via feature disentanglement and gated fusion","author":"Chen","year":"2019"},{"key":"10.1016\/j.patcog.2026.113991_b28","doi-asserted-by":"crossref","unstructured":"S. Wei, C. Luo, Y. Luo, MMANet: Margin-aware distillation and modality-aware regularization for incomplete multimodal learning, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 20039\u201320049.","DOI":"10.1109\/CVPR52729.2023.01919"},{"key":"10.1016\/j.patcog.2026.113991_b29","doi-asserted-by":"crossref","first-page":"3592","DOI":"10.1109\/TASLP.2021.3129331","article-title":"Multimodal emotion recognition with temporal and semantic consistency","volume":"29","author":"Chen","year":"2021","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"4","key":"10.1016\/j.patcog.2026.113991_b30","doi-asserted-by":"crossref","first-page":"1856","DOI":"10.1109\/TAFFC.2024.3378570","article-title":"Contrastive learning based modality-invariant feature acquisition for robust multimodal emotion recognition with missing modalities","volume":"15","author":"Liu","year":"2024","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10.1016\/j.patcog.2026.113991_b31","doi-asserted-by":"crossref","unstructured":"J. Wang, Y. Chen, Z. Zheng, X. Li, M.-M. Cheng, Q. Hou, CrossKD: Cross-head knowledge distillation for object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 16520\u201316530.","DOI":"10.1109\/CVPR52733.2024.01563"},{"key":"10.1016\/j.patcog.2026.113991_b32","first-page":"65299","article-title":"Knowledge diffusion for distillation","volume":"vol. 36","author":"Huang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113991_b33","doi-asserted-by":"crossref","unstructured":"S. Wei, C. Luo, Y. Luo, Scaled decoupled distillation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 15975\u201315983.","DOI":"10.1109\/CVPR52733.2024.01512"},{"key":"10.1016\/j.patcog.2026.113991_b34","doi-asserted-by":"crossref","unstructured":"A. Lu, J. Zhao, C. Li, Y. Xiao, B. Luo, Breaking modality gap in RGBT tracking: Coupled knowledge distillation, in: Proceedings of the ACM International Conference on Multimedia, 2024, pp. 9291\u20139300.","DOI":"10.1145\/3664647.3680878"},{"key":"10.1016\/j.patcog.2026.113991_b35","doi-asserted-by":"crossref","unstructured":"B. Ye, H. Chang, B. Ma, S. Shan, X. Chen, Joint feature learning and relation modeling for tracking: A one-stream framework, in: Proceedings of the European Conference on Computer Vision, 2022, pp. 341\u2013357.","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"10.1016\/j.patcog.2026.113991_b36","doi-asserted-by":"crossref","unstructured":"Q. Wu, T. Yang, Z. Liu, B. Wu, Y. Shan, A.B. Chan, Dropmae: Masked autoencoders with spatial-attention dropout for tracking tasks, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 14561\u201314571.","DOI":"10.1109\/CVPR52729.2023.01399"},{"key":"10.1016\/j.patcog.2026.113991_b37","doi-asserted-by":"crossref","unstructured":"C.L. Li, A. Lu, A.H. Zheng, Z. Tu, J. Tang, Multi-adapter RGBT tracking, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, 2019, pp. 2262\u20132270.","DOI":"10.1109\/ICCVW.2019.00279"},{"key":"10.1016\/j.patcog.2026.113991_b38","doi-asserted-by":"crossref","unstructured":"C. Li, L. Liu, A. Lu, Q. Ji, J. Tang, Challenge-aware RGBT tracking, in: Proceedings of the European Conference on Computer Vision, 2020, pp. 222\u2013237.","DOI":"10.1007\/978-3-030-58542-6_14"},{"key":"10.1016\/j.patcog.2026.113991_b39","doi-asserted-by":"crossref","first-page":"2714","DOI":"10.1007\/s11263-021-01495-3","article-title":"Learning adaptive attribute-driven representation for real-time RGB-T tracking","volume":"129","author":"Zhang","year":"2021","journal-title":"Int. J. Comput. Vis."},{"issue":"3","key":"10.1016\/j.patcog.2026.113991_b40","doi-asserted-by":"crossref","first-page":"4118","DOI":"10.1109\/TNNLS.2022.3157594","article-title":"Duality-gated mutual condition network for RGBT tracking","volume":"36","author":"Lu","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.113991_b41","first-page":"2831","article-title":"Attribute-based progressive fusion network for rgbt tracking","volume":"vol. 36","author":"Xiao","year":"2022"},{"key":"10.1016\/j.patcog.2026.113991_b42","doi-asserted-by":"crossref","unstructured":"J. Zhu, S. Lai, X. Chen, D. Wang, H. Lu, Visual prompt multi-modal tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 9516\u20139526.","DOI":"10.1109\/CVPR52729.2023.00918"},{"key":"10.1016\/j.patcog.2026.113991_b43","doi-asserted-by":"crossref","unstructured":"X. Hou, J. Xing, Y. Qian, Y. Guo, S. Xin, J. Chen, K. Tang, M. Wang, Z. Jiang, L. Liu, Y. Liu, SDSTrack: Self-Distillation Symmetric Adapter Learning for Multi-Modal Visual Object Tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 26541\u201326551.","DOI":"10.1109\/CVPR52733.2024.02507"},{"key":"10.1016\/j.patcog.2026.113991_b44","first-page":"2239","article-title":"Sutrack: Towards simple and unified single object tracking","volume":"vol. 39","author":"Chen","year":"2025"},{"key":"10.1016\/j.patcog.2026.113991_b45","doi-asserted-by":"crossref","unstructured":"Y. Tan, J. Shao, E. Zamfir, R. Li, Z. An, C. Ma, D. Paudel, L. Van Gool, R. Timofte, Z. Wu, What You Have is What You Track: Adaptive and Robust Multimodal Tracking, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2025, pp. 3455\u20133465.","DOI":"10.1109\/ICCV51701.2025.00330"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009568?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009568?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:06:09Z","timestamp":1780931169000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326009568"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":45,"alternative-id":["S0031320326009568"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113991","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Temporal multimodal knowledge distillation for modality-missing RGBT tracking","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113991","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113991"}}