{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,17]],"date-time":"2026-07-17T15:15:33Z","timestamp":1784301333177,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733473","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:29:43Z","timestamp":1750876183000},"page":"1998-2002","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["DARTer: Dynamic Adaptive Representation Tracker for Nighttime UAV Tracking"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3262-3049","authenticated-orcid":false,"given":"Xuzhao","family":"Li","sequence":"first","affiliation":[{"name":"Beijing Institute of Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2565-8857","authenticated-orcid":false,"given":"Xuchen","family":"Li","sequence":"additional","affiliation":[{"name":"Zhongguancun Academy, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5872-7566","authenticated-orcid":false,"given":"Shiyu","family":"Hu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.3390\/s19030652"},{"key":"e_1_3_2_1_2_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv","author":"Alexey Dosovitskiy","year":"2010","unstructured":"Dosovitskiy Alexey. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00628"},{"key":"e_1_3_2_1_4_1","volume-title":"HiFT: Hierarchical Feature Transformer for Aerial Tracking. In 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). 15457--15466","author":"Cao Ziang","year":"2021","unstructured":"Ziang Cao, Changhong Fu, Junjie Ye, Bowen Li, and Yiming Li. 2021a. HiFT: Hierarchical Feature Transformer for Aerial Tracking. In 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). 15457--15466."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636309"},{"key":"e_1_3_2_1_6_1","volume-title":"TCTrack: Temporal Contexts for Aerial Tracking. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Cao Ziang","year":"2022","unstructured":"Ziang Cao, Ziyuan Huang, Liang Pan, Shiwei Zhang, Ziwei Liu, and Changhong Fu. 2022. TCTrack: Temporal Contexts for Aerial Tracking. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022), 14778--14788."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3307174"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00721"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00552"},{"key":"e_1_3_2_1_11_1","volume-title":"MemVLT: Vision-Language Tracking with Adaptive Memory-based Prompts. In The Thirty-eighth Annual Conference on Neural Information Processing Systems.","author":"Feng Xiaokun","year":"2024","unstructured":"Xiaokun Feng, Xuchen Li, Shiyu Hu, Dailing Zhang, Meiqi Wu, Jing Zhang, Xiaotang Chen, and Kaiqi Huang. 2024. MemVLT: Vision-Language Tracking with Adaptive Memory-based Prompts. In The Thirty-eighth Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560756"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981070"},{"key":"e_1_3_2_1_14_1","volume-title":"Prompt-Driven Temporal Domain Adaptation for Nighttime UAV Tracking. In 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, 9706--9713","author":"Fu Changhong","year":"2024","unstructured":"Changhong Fu, Yiheng Wang, Liangliang Yao, Guangze Zheng, Haobo Zuo, and Jia Pan. 2024. Prompt-Driven Temporal Domain Adaptation for Nighttime UAV Tracking. In 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, 9706--9713."},{"key":"e_1_3_2_1_15_1","volume-title":"SIoU loss: More powerful learning for bounding box regression. arXiv preprint arXiv:2205.12740","author":"Gevorgyan Zhora","year":"2022","unstructured":"Zhora Gevorgyan. 2022. SIoU loss: More powerful learning for bounding box regression. arXiv preprint arXiv:2205.12740 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Hu Shiyu","year":"2024","unstructured":"Shiyu Hu, Dailing Zhang, Xiaokun Feng, Xuchen Li, Xin Zhao, Kaiqi Huang, et al. 2024. A multi-modal global instance tracking benchmark (mgit): Better locating target in complex spatio-temporal and causal relationship. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"Got-10k: A large high-diversity benchmark for generic object tracking in the wild","author":"Huang Lianghua","year":"2019","unstructured":"Lianghua Huang, Xin Zhao, and Kaiqi Huang. 2019. Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE transactions on pattern analysis and machine intelligence, Vol. 43, 5 (2019), 1562--1577."},{"key":"e_1_3_2_1_19_1","volume-title":"Exploring Lightweight Hierarchical Vision Transformers for Efficient Visual Tracking. 2023 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Kang Ben","year":"2023","unstructured":"Ben Kang, Xin Chen, D. Wang, Houwen Peng, and Huchuan Lu. 2023. Exploring Lightweight Hierarchical Vision Transformers for Efficient Visual Tracking. 2023 IEEE\/CVF International Conference on Computer Vision (ICCV) (2023), 9578--9587. https:\/\/api.semanticscholar.org\/CorpusID:260887522"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561564"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2022.3162892"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00441"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00935"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01286"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00724"},{"key":"e_1_3_2_1_26_1","volume-title":"Dtvlt: A multi-modal diverse text benchmark for visual language tracking based on llm. arXiv preprint arXiv:2410.02492","author":"Li Xuchen","year":"2024","unstructured":"Xuchen Li, Shiyu Hu, Xiaokun Feng, Dailing Zhang, Meiqi Wu, Jing Zhang, and Kaiqi Huang. 2024b. Dtvlt: A multi-modal diverse text benchmark for visual language tracking based on llm. arXiv preprint arXiv:2410.02492 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"How Texts Help? A Fine-grained Evaluation to Reveal the Role of Language in Vision-Language Tracking. arXiv preprint arXiv:2411.15600","author":"Li Xuchen","year":"2024","unstructured":"Xuchen Li, Shiyu Hu, Xiaokun Feng, Dailing Zhang, Meiqi Wu, Jing Zhang, and Kaiqi Huang. 2024c. How Texts Help? A Fine-grained Evaluation to Reveal the Role of Language in Vision-Language Tracking. arXiv preprint arXiv:2411.15600 (2024)."},{"key":"e_1_3_2_1_28_1","volume-title":"Learning Adaptive and View-Invariant Vision Transformer for Real-Time UAV Tracking. In Forty-first International Conference on Machine Learning.","author":"Li Yongxin","year":"2024","unstructured":"Yongxin Li, Mengyuan Liu, You Wu, Xucheng Wang, Xiangyang Yang, and Shuiwang Li. 2024 d. Learning Adaptive and View-Invariant Vision Transformer for Real-Time UAV Tracking. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2018.10.010"},{"key":"e_1_3_2_1_31_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov I","year":"2017","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3440330"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02068"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2011.6083125"},{"key":"e_1_3_2_1_36_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00935"},{"key":"e_1_3_2_1_38_1","volume-title":"MambaNUT: Nighttime UAV Tracking via Mamba and Adaptive Curriculum Learning. arXiv preprint arXiv:2412.00626","author":"Wu You","year":"2024","unstructured":"You Wu, Xiangyang Yang, Xucheng Wang, Hengzhou Ye, Dan Zeng, and Shuiwang Li. 2024. MambaNUT: Nighttime UAV Tracking via Mamba and Adaptive Curriculum Learning. arXiv preprint arXiv:2412.00626 (2024)."},{"key":"e_1_3_2_1_39_1","unstructured":"Liangliang Yao Changhong Fu and et al. 2023. SGDViT: Saliency-Guided Dynamic Vision Transformer for UAV Tracking. arXiv preprint arXiv:2303.04378 (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3146911"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00869"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_46"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01793"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610544"}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA","acronym":"ICMR '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733473","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:11:05Z","timestamp":1755749465000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733473"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":46,"alternative-id":["10.1145\/3731715.3733473","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733473","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}