{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:56:11Z","timestamp":1781535371795,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62202362"],"award-info":[{"award-number":["62202362"]}]},{"name":"National Natural Science Foundation of China","award":["62302073"],"award-info":[{"award-number":["62302073"]}]},{"name":"Guangdong Natural Science Fund for Distinguished Young Scholar","award":["2023B1515020079"],"award-info":[{"award-number":["2023B1515020079"]}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2024A1515011626"],"award-info":[{"award-number":["2024A1515011626"]}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2025A1515012949"],"award-info":[{"award-number":["2025A1515012949"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810664","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1450-1458","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DINOTrack: Leverage Differential Attention for Noise-Aware Visual Tracking with DINOv3"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3865-6168","authenticated-orcid":false,"given":"Wei","family":"Xu","sequence":"first","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9834-524X","authenticated-orcid":false,"given":"Gu","family":"Geng","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9403-1112","authenticated-orcid":false,"given":"Di","family":"Yuan","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3690-7902","authenticated-orcid":false,"given":"Rui","family":"Chen","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0885-7976","authenticated-orcid":false,"given":"Qiao","family":"Liu","sequence":"additional","affiliation":[{"name":"National Center for Applied Mathematics in Chongqing, Chongqing Normal University, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Hangbo Bao Li Dong Songhao Piao and Furu Wei. 2021. Beit: Bert pre-training of image transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.08254 (2021)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00628"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"e_1_3_3_1_7_2","first-page":"1597","volume-title":"International conference on machine learning","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PmLR, 1597\u20131607."},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Yutao Cui Cheng Jiang Gangshan Wu and Limin Wang. 2024. MixFormer: End-to-End Tracking With Iterative Mixed Attention. IEEE Transactions on Pattern Analysis and Machine Intelligence 46 6 (2024) 4129\u20134146.","DOI":"10.1109\/TPAMI.2024.3349519"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.733"},{"key":"e_1_3_3_1_11_2","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.11929 (2020)."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Heng Fan Hexin Bai Liting Lin Fan Yang Peng Chu Ge Deng Sijia Yu Harshit Mingzhen Huang Juehuan Liu et\u00a0al. 2021. Lasot: A high-quality large-scale single object tracking benchmark. International Journal of Computer Vision 129 2 (2021) 439\u2013461.","DOI":"10.1007\/s11263-020-01387-y"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00552"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_9"},{"key":"e_1_3_3_1_15_2","volume-title":"First conference on language modeling","author":"Gu Albert","year":"2024","unstructured":"Albert Gu and Tri Dao. 2024. Mamba: Linear-time sequence modeling with selective state spaces. In First conference on language modeling."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_3_1_18_2","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang Weizhu Chen et\u00a0al. 2022. Lora: Low-rank adaptation of large language models. ICLR 1 2 (2022) 3."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Lianghua Huang Xin Zhao and Kaiqi Huang. 2019. Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE transactions on pattern analysis and machine intelligence 43 5 (2019) 1562\u20131577.","DOI":"10.1109\/TPAMI.2019.2957464"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/HST47167.2019.9032954"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1201\/9781420037807"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00441"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00935"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Chenglong Li Wanlin Xue Yaqing Jia Zhichen Qu Bin Luo Jin Tang and Dengdi Sun. 2021. LasHeR: A large-scale high-diversity benchmark for RGBT tracking. IEEE Transactions on Image Processing 31 (2021) 392\u2013404.","DOI":"10.1109\/TIP.2021.3130533"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Donghai Liao Xiu Shu Zhihui Li Qiao Liu Di Yuan Xiaojun Chang and Zhenyu He. 2025. Fine-Grained Feature and Template Reconstruction for TIR Object Tracking. IEEE Transactions on Circuits and Systems for Video Technology 35 9 (2025) 9276\u20139286.","DOI":"10.1109\/TCSVT.2025.3556529"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_1_29_2","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01319"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.465"},{"key":"e_1_3_3_1_33_2","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et\u00a0al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.07193 (2023)."},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_3_1_35_2","unstructured":"Oriane Sim\u00e9oni Huy\u00a0V Vo Maximilian Seitzer Federico Baldassarre Maxime Oquab Cijo Jose Vasil Khalidov Marc Szafraniec Seungeun Yi Micha\u00ebl Ramamonjisoa et\u00a0al. 2025. Dinov3. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.10104 (2025)."},{"key":"e_1_3_3_1_36_2","unstructured":"Yutao Sun Li Dong Shaohan Huang Shuming Ma Yuqing Xia Jilong Xue Jianyong Wang and Furu Wei. 2023. Retentive network: A successor to transformer for large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.08621 (2023)."},{"key":"e_1_3_3_1_37_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_38_2","unstructured":"Junke Wang Dongdong Chen Zuxuan Wu Chong Luo Xiyang Dai Lu Yuan and Yu-Gang Jiang. 2023. Omnitracker: Unifying object tracking by tracking-with-detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.12079 (2023)."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01355"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"e_1_3_3_1_43_2","unstructured":"Tianzhu Ye Li Dong Yuqing Xia Yutao Sun Yi Zhu Gao Huang and Furu Wei. 2024. Differential transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.05258 (2024)."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Di Yuan Xiaojun Chang Po-Yao Huang Qiao Liu and Zhenyu He. 2021. Self-supervised deep correlation tracking. IEEE Transactions on Image Processing 30 (2021) 976\u2013985.","DOI":"10.1109\/TIP.2020.3037518"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","unstructured":"Di Yuan Xiaojun Chang Qiao Liu Yi Yang Dehua Wang Minglei Shu Zhenyu He and Guangming Shi. 2024. Active learning for deep visual tracking. IEEE Transactions on Neural Networks and Learning Systems 35 10 (2024) 13284\u201313296.","DOI":"10.1109\/TNNLS.2023.3266837"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"crossref","unstructured":"Haoran Zhang Jinyu Wang et\u00a0al. 2024. Query-Based Object Visual Tracking with Parallel Sequence. Sensors 24 15 (2024) 4802.","DOI":"10.3390\/s24154802"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00868"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01309"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_46"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:48:34Z","timestamp":1781534914000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810664"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":48,"alternative-id":["10.1145\/3805622.3810664","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810664","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}