{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T02:29:30Z","timestamp":1775010570606,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612240","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"1997-2005","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Unambiguous Object Tracking by Exploiting Target Cues"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7450-5189","authenticated-orcid":false,"given":"Jie","family":"Gao","sequence":"first","affiliation":[{"name":"Huaqiao University &amp; Guangxi Normal University, Xiamen &amp; Guangxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3423-1539","authenticated-orcid":false,"given":"Bineng","family":"Zhong","sequence":"additional","affiliation":[{"name":"Guangxi Normal University, Guangxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3006-4123","authenticated-orcid":false,"given":"Yan","family":"Chen","sequence":"additional","affiliation":[{"name":"Huaqiao University, Xiamen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Fully-Convolutional Siamese Networks for Object Tracking. In ECCV Workshops (2) (Lecture Notes in Computer Science","volume":"865","author":"Bertinetto Luca","unstructured":"Luca Bertinetto, Jack Valmadre, Jo\u00e3o F. Henriques, Andrea Vedaldi, and Philip H. S. Torr. 2016. Fully-Convolutional Siamese Networks for Object Tracking. In ECCV Workshops (2) (Lecture Notes in Computer Science, Vol. 9914). 850--865."},{"key":"e_1_3_2_1_2_1","volume-title":"Luc Van Gool, and Radu Timofte","author":"Bhat Goutam","year":"2019","unstructured":"Goutam Bhat, Martin Danelljan, Luc Van Gool, and Radu Timofte. 2019. Learning Discriminative Model Prediction for Tracking. In ICCV. IEEE, 6181--6190."},{"key":"e_1_3_2_1_3_1","volume-title":"Luc Van Gool, and Radu Timofte","author":"Bhat Goutam","year":"2020","unstructured":"Goutam Bhat, Martin Danelljan, Luc Van Gool, and Radu Timofte. 2020. Know Your Surroundings: Exploiting Scene Information for Object Tracking. In ECCV (23) (Lecture Notes in Computer Science, Vol. 12368). Springer, 205--221."},{"key":"e_1_3_2_1_4_1","volume-title":"TCTrack: Temporal Contexts for Aerial Tracking","author":"Cao Ziang","unstructured":"Ziang Cao, Ziyuan Huang, Liang Pan, Shiwei Zhang, Ziwei Liu, and Changhong Fu. 2022. TCTrack: Temporal Contexts for Aerial Tracking. In CVPR. IEEE, 14778--14788."},{"key":"e_1_3_2_1_5_1","volume-title":"ECCV (22) (Lecture Notes in Computer Science","author":"Chen Boyu","unstructured":"Boyu Chen, Peixia Li, Lei Bai, Lei Qiao, Qiuhong Shen, Bo Li, Weihao Gan, Wei Wu, and Wanli Ouyang. 2022. Backbone is All Your Need: A Simplified Architecture for Visual Object Tracking. In ECCV (22) (Lecture Notes in Computer Science, Vol. 13682). Springer, 375--392."},{"key":"e_1_3_2_1_6_1","volume-title":"SeqTrack: Sequence to Sequence Learning for Visual Object Tracking. CoRR abs\/2304.14394","author":"Chen Xin","year":"2023","unstructured":"Xin Chen, Houwen Peng, DongWang, Huchuan Lu, and Han Hu. 2023. SeqTrack: Sequence to Sequence Learning for Visual Object Tracking. CoRR abs\/2304.14394 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Transformer Tracking","author":"Chen Xin","unstructured":"Xin Chen, Bin Yan, Jiawen Zhu, Dong Wang, Xiaoyun Yang, and Huchuan Lu. 2021. Transformer Tracking. In CVPR. Computer Vision Foundation \/ IEEE, 8126--8135."},{"key":"e_1_3_2_1_8_1","volume-title":"MixFormer: Endto-End Tracking with Iterative Mixed Attention","author":"Cui Yutao","unstructured":"Yutao Cui, Cheng Jiang, Limin Wang, and Gangshan Wu. 2022. MixFormer: Endto-End Tracking with Iterative Mixed Attention. In CVPR. IEEE, 13598--13608."},{"key":"e_1_3_2_1_9_1","volume-title":"High-Performance Long-Term Tracking With Meta-Updater","author":"Dai Kenan","unstructured":"Kenan Dai, Yunhua Zhang, Dong Wang, Jianhua Li, Huchuan Lu, and Xiaoyun Yang. 2020. High-Performance Long-Term Tracking With Meta-Updater. In CVPR. Computer Vision Foundation \/ IEEE, 6297--6306."},{"key":"e_1_3_2_1_10_1","volume-title":"Fahad Shahbaz Khan, and Michael Felsberg","author":"Danelljan Martin","year":"2017","unstructured":"Martin Danelljan, Goutam Bhat, Fahad Shahbaz Khan, and Michael Felsberg. 2017. ECO: Efficient Convolution Operators for Tracking. In CVPR. IEEE Computer Society, 6931--6939."},{"key":"e_1_3_2_1_11_1","volume-title":"Fahad Shahbaz Khan, and Michael Felsberg","author":"Danelljan Martin","year":"2019","unstructured":"Martin Danelljan, Goutam Bhat, Fahad Shahbaz Khan, and Michael Felsberg. 2019. ATOM: Accurate Tracking by Overlap Maximization. In CVPR. Computer Vision Foundation \/ IEEE, 4660--4669."},{"key":"e_1_3_2_1_12_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_13_1","volume-title":"LaSOT: A High-Quality Benchmark for Large-Scale Single Object Tracking","author":"Fan Heng","unstructured":"Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao, and Haibin Ling. 2019. LaSOT: A High-Quality Benchmark for Large-Scale Single Object Tracking. In CVPR. Computer Vision Foundation \/ IEEE, 5374--5383."},{"key":"e_1_3_2_1_14_1","volume-title":"Siamese Natural Language Tracker: Tracking by Natural Language Descriptions With Siamese Trackers","author":"Feng Qi","unstructured":"Qi Feng, Vitaly Ablavsky, Qinxun Bai, and Stan Sclaroff. 2021. Siamese Natural Language Tracker: Tracking by Natural Language Descriptions With Siamese Trackers. In CVPR. Computer Vision Foundation \/ IEEE, 5851--5860."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Zhihong Fu Zehua Fu Qingjie Liu Wenrui Cai and Yunhong Wang. 2022. SparseTT: Visual Tracking with Sparse Transformers. In IJCAI. ijcai.org 905--912.","DOI":"10.24963\/ijcai.2022\/127"},{"key":"e_1_3_2_1_16_1","volume-title":"STMTrack: Template-Free Visual Tracking With Space-Time Memory Networks","author":"Fu Zhihong","unstructured":"Zhihong Fu, Qingjie Liu, Zehua Fu, and Yunhong Wang. 2021. STMTrack: Template-Free Visual Tracking With Space-Time Memory Networks. In CVPR. Computer Vision Foundation \/ IEEE, 13774--13783."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3221723"},{"key":"e_1_3_2_1_18_1","volume-title":"ECCV (22) (Lecture Notes in Computer Science","author":"Gao Shenyuan","unstructured":"Shenyuan Gao, Chunluan Zhou, Chao Ma, Xinggang Wang, and Junsong Yuan. 2022. AiATrack: Attention in Attention for Transformer Visual Tracking. In ECCV (22) (Lecture Notes in Computer Science, Vol. 13682). Springer, 146--164."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Mingzhe Guo Zhipeng Zhang Heng Fan Liping Jing Yilin Lyu Bing Li and Weiming Hu. 2022. Learning Target-aware Representation for Visual Tracking via Informative Interactions. In IJCAI. ijcai.org 927--934.","DOI":"10.24963\/ijcai.2022\/130"},{"key":"e_1_3_2_1_20_1","volume-title":"Ling Shao, and Jianbing Shen.","author":"Han Wencheng","year":"2021","unstructured":"Wencheng Han, Xingping Dong, Fahad Shahbaz Khan, Ling Shao, and Jianbing Shen. 2021. Learning To Fuse Asymmetric Feature Maps in Siamese Trackers. In CVPR. Computer Vision Foundation \/ IEEE, 16570--16580."},{"key":"e_1_3_2_1_21_1","volume-title":"Girshick","author":"He Kaiming","year":"2022","unstructured":"Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Doll\u00e1r, and Ross B. Girshick. 2022. Masked Autoencoders Are Scalable Vision Learners. In CVPR. IEEE, 15979--15988."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2957464"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01204-1"},{"key":"e_1_3_2_1_24_1","volume-title":"SiamRPN: Evolution of Siamese Visual Tracking With Very Deep Networks","author":"Li Bo","unstructured":"Bo Li, Wei Wu, Qiang Wang, Fangyi Zhang, Junliang Xing, and Junjie Yan. 2019. SiamRPN: Evolution of Siamese Visual Tracking With Very Deep Networks. In CVPR. Computer Vision Foundation \/ IEEE, 4282--4291."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.11.068"},{"key":"e_1_3_2_1_26_1","volume-title":"ECCV (5) (Lecture Notes in Computer Science","author":"Lin Tsung-Yi","unstructured":"Tsung-Yi Lin, Michael Maire, Serge J. Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In ECCV (5) (Lecture Notes in Computer Science, Vol. 8693). Springer, 740--755."},{"key":"e_1_3_2_1_27_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. DecoupledWeight Decay Regularization. In ICLR (Poster). OpenReview.net."},{"key":"e_1_3_2_1_28_1","volume-title":"D3S - A Discriminative Single Shot Segmentation Tracker","author":"Lukezic Alan","unstructured":"Alan Lukezic, Jiri Matas, and Matej Kristan. 2020. D3S - A Discriminative Single Shot Segmentation Tracker. In CVPR. Computer Vision Foundation \/ IEEE, 7131--7140."},{"key":"e_1_3_2_1_29_1","volume-title":"Linchao Zhu, Haoqi Fan, Yilei Xu, Yi Yang, and Zhicheng Yan.","author":"Ma Fan","year":"2022","unstructured":"Fan Ma, Mike Zheng Shou, Linchao Zhu, Haoqi Fan, Yilei Xu, Yi Yang, and Zhicheng Yan. 2022. Unified Transformer Tracker for Object Tracking. In CVPR. IEEE, 8771--8780."},{"key":"e_1_3_2_1_30_1","volume-title":"Fisher Yu, and Luc Van Gool.","author":"Mayer Christoph","year":"2022","unstructured":"Christoph Mayer, Martin Danelljan, Goutam Bhat, Matthieu Paul, Danda Pani Paudel, Fisher Yu, and Luc Van Gool. 2022. Transforming Model Prediction for Tracking. In CVPR. IEEE, 8721--8730."},{"key":"e_1_3_2_1_31_1","volume-title":"Danda Pani Paudel, and Luc Van Gool","author":"Mayer Christoph","year":"2021","unstructured":"Christoph Mayer, Martin Danelljan, Danda Pani Paudel, and Luc Van Gool. 2021. Learning Target Candidate Association to Keep Track of What Not to Track. In ICCV. IEEE, 13424--13434."},{"key":"e_1_3_2_1_32_1","volume-title":"ECCV (1) (Lecture Notes in Computer Science","author":"M\u00fcller Matthias","unstructured":"Matthias M\u00fcller, Adel Bibi, Silvio Giancola, Salman Al-Subaihi, and Bernard Ghanem. 2018. TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild. In ECCV (1) (Lecture Notes in Computer Science, Vol. 11205). Springer, 310--327."},{"key":"e_1_3_2_1_33_1","volume-title":"Learning Multi-domain Convolutional Neural Networks for Visual Tracking","author":"Nam Hyeonseob","unstructured":"Hyeonseob Nam and Bohyung Han. 2016. Learning Multi-domain Convolutional Neural Networks for Visual Tracking. In CVPR. IEEE Computer Society, 4293--4302."},{"key":"e_1_3_2_1_34_1","volume-title":"ECCV (22) (Lecture Notes in Computer Science","author":"Paul Matthieu","unstructured":"Matthieu Paul, Martin Danelljan, Christoph Mayer, and Luc Van Gool. 2022. Robust Visual Tracking by Segmentation. In ECCV (22) (Lecture Notes in Computer Science, Vol. 13682). Springer, 571--588."},{"key":"e_1_3_2_1_35_1","volume-title":"ECCV (22) (Lecture Notes in Computer Science","author":"Paul Matthieu","unstructured":"Matthieu Paul, Martin Danelljan, Christoph Mayer, and Luc Van Gool. 2022. Robust Visual Tracking by Segmentation. In ECCV (22) (Lecture Notes in Computer Science, Vol. 13682). Springer, 571--588."},{"key":"e_1_3_2_1_36_1","volume-title":"Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression","author":"Rezatofighi Hamid","unstructured":"Hamid Rezatofighi, Nathan Tsoi, JunYoung Gwak, Amir Sadeghian, Ian D. Reid, and Silvio Savarese. 2019. Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression. In CVPR. Computer Vision Foundation \/ IEEE, 658--666."},{"key":"e_1_3_2_1_37_1","volume-title":"Yi-Ping Phoebe Chen, and Wei Yang","author":"Song Zikai","year":"2022","unstructured":"Zikai Song, Junqing Yu, Yi-Ping Phoebe Chen, and Wei Yang. 2022. Transformer Tracking with Cyclic Shifting Window Attention. In CVPR. IEEE, 8781--8790."},{"key":"e_1_3_2_1_38_1","volume-title":"Smeulders","author":"Tao Ran","year":"2016","unstructured":"Ran Tao, Efstratios Gavves, and Arnold W. M. Smeulders. 2016. Siamese Instance Search for Tracking. In CVPR. IEEE Computer Society, 1420--1429."},{"key":"e_1_3_2_1_39_1","volume-title":"Siam R-CNN: Visual Tracking by Re-Detection","author":"Voigtlaender Paul","unstructured":"Paul Voigtlaender, Jonathon Luiten, Philip H. S. Torr, and Bastian Leibe. 2020. Siam R-CNN: Visual Tracking by Re-Detection. In CVPR. Computer Vision Foundation \/ IEEE, 6577--6587."},{"key":"e_1_3_2_1_40_1","volume-title":"Transformer Meets Tracker: Exploiting Temporal Context for Robust Visual Tracking","author":"Wang Ning","unstructured":"Ning Wang, Wengang Zhou, Jie Wang, and Houqiang Li. 2021. Transformer Meets Tracker: Exploiting Temporal Context for Robust Visual Tracking. In CVPR. Computer Vision Foundation \/ IEEE, 1571--1580."},{"key":"e_1_3_2_1_41_1","volume-title":"Towards More Flexible and Accurate Object Tracking With Natural Language: Algorithms and Benchmark","author":"Wang Xiao","unstructured":"Xiao Wang, Xiujun Shu, Zhipeng Zhang, Bo Jiang, Yaowei Wang, Yonghong Tian, and Feng Wu. 2021. Towards More Flexible and Accurate Object Tracking With Natural Language: Algorithms and Benchmark. In CVPR. Computer Vision Foundation \/ IEEE, 13763--13773."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2388226"},{"key":"e_1_3_2_1_43_1","volume-title":"Correlation-Aware Deep Tracking","author":"Xie Fei","unstructured":"Fei Xie, Chunyu Wang, Guangting Wang, Yue Cao, Wankou Yang, and Wenjun Zeng. 2022. Correlation-Aware Deep Tracking. In CVPR. IEEE, 8741--8750."},{"key":"e_1_3_2_1_44_1","volume-title":"Huang","author":"Xu Ning","year":"2018","unstructured":"Ning Xu, Linjie Yang, Yuchen Fan, Dingcheng Yue, Yuchen Liang, Jianchao Yang, and Thomas S. Huang. 2018. YouTube-VOS: A Large-Scale Video Object Segmentation Benchmark. CoRR abs\/1809.03327 (2018)."},{"key":"e_1_3_2_1_45_1","volume-title":"Learning Spatio-Temporal Transformer for Visual Tracking","author":"Yan Bin","unstructured":"Bin Yan, Houwen Peng, Jianlong Fu, DongWang, and Huchuan Lu. 2021. Learning Spatio-Temporal Transformer for Visual Tracking. In ICCV. IEEE, 10428--10437."},{"key":"e_1_3_2_1_46_1","volume-title":"Alpha-Refine: Boosting Tracking Performance by Precise Bounding Box Estimation","author":"Yan Bin","unstructured":"Bin Yan, Xinyu Zhang, DongWang, Huchuan Lu, and Xiaoyun Yang. 2021. Alpha-Refine: Boosting Tracking Performance by Precise Bounding Box Estimation. In CVPR. Computer Vision Foundation \/ IEEE, 5289--5298."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Jinyu Yang Zhe Li Feng Zheng Ales Leonardis and Jingkuan Song. 2022. Prompting for Multi-Modal Tracking. In ACM Multimedia. ACM 3492--3500.","DOI":"10.1145\/3503161.3547851"},{"key":"e_1_3_2_1_48_1","volume-title":"ECCV (22) (Lecture Notes in Computer Science","author":"Ye Botao","unstructured":"Botao Ye, Hong Chang, Bingpeng Ma, Shiguang Shan, and Xilin Chen. 2022. Joint Feature Learning and Relation Modeling for Tracking: A One-Stream Framework. In ECCV (22) (Lecture Notes in Computer Science, Vol. 13682). Springer, 341--357."},{"key":"e_1_3_2_1_49_1","volume-title":"Learn to Match: Automatic Matching Network Design for Visual Tracking","author":"Zhang Zhipeng","unstructured":"Zhipeng Zhang, Yihao Liu, Xiao Wang, Bing Li, and Weiming Hu. 2021. Learn to Match: Automatic Matching Network Design for Visual Tracking. In ICCV. IEEE, 13319--13328."},{"key":"e_1_3_2_1_50_1","volume-title":"Ocean: Object-Aware Anchor-Free Tracking. In ECCV (21) (Lecture Notes in Computer Science","author":"Zhang Zhipeng","year":"2020","unstructured":"Zhipeng Zhang, Houwen Peng, Jianlong Fu, Bing Li, and Weiming Hu. 2020. Ocean: Object-Aware Anchor-Free Tracking. In ECCV (21) (Lecture Notes in Computer Science, Vol. 12366). Springer, 771--787."},{"key":"e_1_3_2_1_51_1","volume-title":"Luc Van Gool, and Radu Timofte","author":"Zhao Bin","year":"2021","unstructured":"Bin Zhao, Goutam Bhat, Martin Danelljan, Luc Van Gool, and Radu Timofte. 2021. Generating Masks from Boxes by Mining Spatio-Temporal Consistencies in Videos. In ICCV. IEEE, 13536--13546."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612240","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612240","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:59:00Z","timestamp":1755820740000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612240"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":51,"alternative-id":["10.1145\/3581783.3612240","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612240","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}