{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T22:43:36Z","timestamp":1774478616639,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","funder":[{"name":"Key Laboratory of Target Cognition and Application Technology","award":["Grant 2023-CXPT-LC-005"],"award-info":[{"award-number":["Grant 2023-CXPT-LC-005"]}]},{"name":"Science and Disruptive Technology Program","award":["Grant AIRCAS2024-AIRCAS-SDTP-03"],"award-info":[{"award-number":["Grant AIRCAS2024-AIRCAS-SDTP-03"]}]},{"name":"Key Program of Chinese Academy of Sciences","award":["RCJJ-145-24-13 and KGFZD-145-25-38"],"award-info":[{"award-number":["RCJJ-145-24-13 and KGFZD-145-25-38"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3758254","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:37:21Z","timestamp":1761377841000},"page":"13046-13052","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Referring Multi-Object Tracking in Satellite Videos: A New Benchmark and Baseline"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4171-3234","authenticated-orcid":false,"given":"Peirong","family":"Zhang","sequence":"first","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7466-0234","authenticated-orcid":false,"given":"Yidan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3409-6369","authenticated-orcid":false,"given":"Hanru","family":"Shi","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8549-499X","authenticated-orcid":false,"given":"Dianyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1426-5056","authenticated-orcid":false,"given":"Xiaoxuan","family":"Liu","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2414-9642","authenticated-orcid":false,"given":"Lei","family":"Wang","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Multi-granularity Localization Transformer with Collaborative Understanding for Referring Multi-Object Tracking","author":"Chen Jiajun","year":"2025","unstructured":"Jiajun Chen, Jiacheng Lin, Guojin Zhong, You Yao, and Zhiyong Li. 2025. Multi-granularity Localization Transformer with Collaborative Understanding for Referring Multi-Object Tracking. IEEE Transactions on Instrumentation and Measurement (2025)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2024.03.013"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01810"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3230846"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2024.06.010"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2023.3278107","article-title":"CFTracker: Multi-object tracking with cross-frame connections in satellite videos","volume":"61","author":"Kong Lingyu","year":"2023","unstructured":"Lingyu Kong, Zhiyuan Yan, Yidan Zhang, Wenhui Diao, Zining Zhu, and Lei Wang. 2023. CFTracker: Multi-object tracking with cross-frame connections in satellite videos. IEEE Transactions on Geoscience and Remote Sensing, Vol. 61 (2023), 1-14.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_9_1","first-page":"1","article-title":"Target-aware transformer for satellite video object tracking","volume":"62","author":"Lai Pujian","year":"2023","unstructured":"Pujian Lai, Meili Zhang, Gong Cheng, Shengyang Li, Xiankai Huang, and Junwei Han. 2023. Target-aware transformer for satellite video object tracking. IEEE Transactions on Geoscience and Remote Sensing, Vol. 62 (2023), 1-10.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00539"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2023.3296451"},{"key":"e_1_3_2_1_12_1","first-page":"1","article-title":"A multitask benchmark dataset for satellite video: Object detection, tracking, and segmentation","volume":"61","author":"Li Shengyang","year":"2023","unstructured":"Shengyang Li, Zhuang Zhou, Manqi Zhao, Jian Yang, Weilong Guo, Yixuan Lv, Longxuan Kou, Han Wang, and Yanfeng Gu. 2023c. A multitask benchmark dataset for satellite video: Object detection, tracking, and segmentation. IEEE Transactions on Geoscience and Remote Sensing, Vol. 61 (2023), 1-21.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS.2019.8900412"},{"key":"e_1_3_2_1_14_1","volume-title":"LaMOT: Language-Guided Multi-Object Tracking. arXiv preprint arXiv:2406.08324","author":"Li Yunhao","year":"2024","unstructured":"Yunhao Li, Xiaoqiong Liu, Luke Liu, Heng Fan, and Libo Zhang. 2024. LaMOT: Language-Guided Multi-Object Tracking. arXiv preprint arXiv:2406.08324 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01375-2"},{"key":"e_1_3_2_1_17_1","volume-title":"Mls-track: Multilevel semantic interaction in rmot. arXiv preprint arXiv:2404.12031","author":"Ma Zeliang","year":"2024","unstructured":"Zeliang Ma, Song Yang, Zhe Cui, Zhicheng Zhao, Fei Su, Delong Liu, and Jingyu Wang. 2024. Mls-track: Multilevel semantic interaction in rmot. arXiv preprint arXiv:2404.12031 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00864"},{"key":"e_1_3_2_1_19_1","first-page":"3205","article-title":"Type-to-track: Retrieve any object via prompt-based tracking","volume":"36","author":"Nguyen Pha","year":"2023","unstructured":"Pha Nguyen, Kha Gia Quach, Kris Kitani, and Khoa Luu. 2023. Type-to-track: Retrieve any object via prompt-based tracking. Advances in Neural Information Processing Systems, Vol. 36 (2023), 3205-3219.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","volume-title":"Motion-Guided Multi-Object Tracking Model for High-Speed Aerial Objects in Satellite Videos","author":"Ren Libo","year":"2024","unstructured":"Libo Ren, Wenxin Yin, Wenhui Diao, Kun Fu, and Xian Sun. 2024. Motion-Guided Multi-Object Tracking Model for High-Speed Aerial Objects in Satellite Videos. IEEE Transactions on Geoscience and Remote Sensing (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2022.3158652"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3045634"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2019.2916953"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ATC63255.2024.10908144"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01406"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106539"},{"key":"e_1_3_2_1_28_1","volume-title":"Temporal-Enhanced Multimodal Transformer for Referring Multi-Object Tracking and Segmentation. arXiv preprint arXiv:2410.13437","author":"Xiao Changcheng","year":"2024","unstructured":"Changcheng Xiao, Qiong Cao, Yujie Zhong, Xiang Zhang, Tao Wang, Canqun Yang, and Long Lan. 2024b. Temporal-Enhanced Multimodal Transformer for Referring Multi-Object Tracking and Segmentation. arXiv preprint arXiv:2410.13437 (2024)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/LGRS.2021.3124222","article-title":"DSFNet: Dynamic and static fusion network for moving object detection in satellite videos","volume":"19","author":"Xiao Chao","year":"2021","unstructured":"Chao Xiao, Qian Yin, Xinyi Ying, Ruojing Li, Shuanglin Wu, Miao Li, Li Liu, Wei An, and Zhijie Chen. 2021. DSFNet: Dynamic and static fusion network for moving object detection in satellite videos. IEEE Geoscience and Remote Sensing Letters, Vol. 19 (2021), 1-5.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2019.2943366"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28493"},{"key":"e_1_3_2_1_32_1","first-page":"1","article-title":"Detecting and tracking small and dense moving objects in satellite videos: A benchmark","volume":"60","author":"Yin Qian","year":"2021","unstructured":"Qian Yin, Qingyong Hu, Hao Liu, Feng Zhang, Yingqian Wang, Zaiping Lin, Wei An, and Yulan Guo. 2021. Detecting and tracking small and dense moving objects in satellite videos: A benchmark. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2021), 1-18.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_38"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01513-4"},{"key":"e_1_3_2_1_36_1","volume-title":"Bootstrapping Referring Multi-Object Tracking. arXiv preprint arXiv:2406.05039","author":"Zhang Yani","year":"2024","unstructured":"Yani Zhang, Dongming Wu, Wencheng Han, and Xingping Dong. 2024. Bootstrapping Referring Multi-Object Tracking. arXiv preprint arXiv:2406.05039 (2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3230378"},{"key":"e_1_3_2_1_38_1","volume-title":"Deformable DETR: Deformable Transformers for End-to-End Object Detection. arXiv preprint arXiv:2010.04159","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable DETR: Deformable Transformers for End-to-End Object Detection. arXiv preprint arXiv:2010.04159 (2020)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3758254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:59:10Z","timestamp":1765342750000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3758254"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":38,"alternative-id":["10.1145\/3746027.3758254","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3758254","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}