{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T22:51:03Z","timestamp":1772319063867,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176060"],"award-info":[{"award-number":["62176060"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["22511105000"],"award-info":[{"award-number":["22511105000"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611728","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"2734-2743","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["DeNoising-MOT: Towards Multiple Object Tracking with Severe Occlusions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-8413-2185","authenticated-orcid":false,"given":"Teng","family":"Fu","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3735-721X","authenticated-orcid":false,"given":"Xiaocong","family":"Wang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1717-0474","authenticated-orcid":false,"given":"Haiyang","family":"Yu","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0181-924X","authenticated-orcid":false,"given":"Ke","family":"Niu","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9633-0033","authenticated-orcid":false,"given":"Bin","family":"Li","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4897-9209","authenticated-orcid":false,"given":"Xiangyang","family":"Xue","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"BoT-SORT: Robust associations multi-pedestrian tracking. arXiv preprint arXiv:2206.14651","author":"Aharon Nir","year":"2022","unstructured":"Nir Aharon, Roy Orfaig, and Ben-Zion Bobrovsky. 2022. BoT-SORT: Robust associations multi-pedestrian tracking. arXiv preprint arXiv:2206.14651 (2022)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.110"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995311"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.21"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00103"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1155\/2008\/246309"},{"key":"e_1_3_2_1_8_1","first-page":"4","article-title":"Is space-time attention all you need for video understanding?","volume":"2","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is space-time attention all you need for video understanding?. In ICML, Vol. 2. 4.","journal-title":"ICML"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/AVSS.2017.8078516"},{"key":"e_1_3_2_1_11_1","volume-title":"Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934","author":"Bochkovskiy Alexey","year":"2020","unstructured":"Alexey Bochkovskiy, Chien-Yao Wang, and Hong-Yuan Mark Liao. 2020. Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934 (2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00792"},{"key":"e_1_3_2_1_13_1","unstructured":"Jinkun Cao Jiangmiao Pang Xinshuo Weng Rawal Khirodkar and Kris M Kitani. [n. d.]. Object Tracking by Hierarchical Part-Whole Attention. ([n. d.])."},{"key":"e_1_3_2_1_14_1","volume-title":"Observation-centric sort: Rethinking sort for robust multi-object tracking. arXiv preprint arXiv:2203.14360","author":"Cao Jinkun","year":"2022","unstructured":"Jinkun Cao, Xinshuo Weng, Rawal Khirodkar, Jiangmiao Pang, and Kris Kitani. 2022. Observation-centric sort: Rethinking sort for robust multi-object tracking. arXiv preprint arXiv:2203.14360 (2022)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_16_1","volume-title":"TR-MOT: Multi-Object Tracking by Reference. arXiv preprint arXiv:2203.16621","author":"Chen Mingfei","year":"2022","unstructured":"Mingfei Chen, Yue Liao, Si Liu, Fei Wang, and Jenq-Neng Hwang. 2022b. TR-MOT: Multi-Object Tracking by Reference. arXiv preprint arXiv:2203.16621 (2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"Seyed Mehdi Iranmanesh, and Kuo-Chin Lien","author":"Chen Xiaotong","year":"2022","unstructured":"Xiaotong Chen, Seyed Mehdi Iranmanesh, and Kuo-Chin Lien. 2022a. Patchtrack: Multiple object tracking using frame patches. arXiv preprint arXiv:2201.00080 (2022)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00485"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00247"},{"key":"e_1_3_2_1_20_1","volume-title":"Mot20: A benchmark for multi object tracking in crowded scenes. arXiv preprint arXiv:2003.09003","author":"Dendorfer Patrick","year":"2020","unstructured":"Patrick Dendorfer, Hamid Rezatofighi, Anton Milan, Javen Shi, Daniel Cremers, Ian Reid, Stefan Roth, Konrad Schindler, and Laura Leal-Taix\u00e9. 2020. Mot20: A benchmark for multi object tracking in crowded scenes. arXiv preprint arXiv:2003.09003 (2020)."},{"key":"e_1_3_2_1_21_1","first-page":"15657","article-title":"Quo Vadis: Is Trajectory Forecasting the Key Towards Long-Term Multi-Object Tracking","volume":"35","author":"Dendorfer Patrick","year":"2022","unstructured":"Patrick Dendorfer, Vladimir Yugay, Aljosa Osep, and Laura Leal-Taix\u00e9. 2022. Quo Vadis: Is Trajectory Forecasting the Key Towards Long-Term Multi-Object Tracking? Advances in Neural Information Processing Systems, Vol. 35 (2022), 15657--15671.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206631"},{"key":"e_1_3_2_1_23_1","volume-title":"Pedestrian detection: An evaluation of the state of the art","author":"Dollar Piotr","year":"2011","unstructured":"Piotr Dollar, Christian Wojek, Bernt Schiele, and Pietro Perona. 2011. Pedestrian detection: An evaluation of the state of the art. IEEE transactions on pattern analysis and machine intelligence, Vol. 34, 4 (2011), 743--761."},{"key":"e_1_3_2_1_24_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587581"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.330"},{"key":"e_1_3_2_1_27_1","volume-title":"Object detection with discriminatively trained part-based models","author":"Felzenszwalb Pedro F","year":"2009","unstructured":"Pedro F Felzenszwalb, Ross B Girshick, David McAllester, and Deva Ramanan. 2009. Object detection with discriminatively trained part-based models. IEEE transactions on pattern analysis and machine intelligence, Vol. 32, 9 (2009), 1627--1645."},{"key":"e_1_3_2_1_28_1","volume-title":"Yolox: Exceeding yolo series in","author":"Ge Zheng","year":"2021","unstructured":"Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, and Jian Sun. 2021. Yolox: Exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430 (2021)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Rudolph Emil Kalman. 1960. A new approach to linear filtering and prediction problems. (1960).","DOI":"10.1115\/1.3662552"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.533"},{"key":"e_1_3_2_1_33_1","volume-title":"The Hungarian method for the assignment problem. Naval research logistics quarterly","author":"Kuhn Harold W","year":"1955","unstructured":"Harold W Kuhn. 1955. The Hungarian method for the assignment problem. Naval research logistics quarterly, Vol. 2, 1--2 (1955), 83--97."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_1_36_1","volume-title":"Dab-detr: Dynamic anchor boxes are better queries for detr. arXiv preprint arXiv:2201.12329","author":"Liu Shilong","year":"2022","unstructured":"Shilong Liu, Feng Li, Hao Zhang, Xiao Yang, Xianbiao Qi, Hang Su, Jun Zhu, and Lei Zhang. 2022. Dab-detr: Dynamic anchor boxes are better queries for detr. arXiv preprint arXiv:2201.12329 (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_39_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_40_1","volume-title":"Hota: A higher order metric for evaluating multi-object tracking. International journal of computer vision","author":"Luiten Jonathon","year":"2021","unstructured":"Jonathon Luiten, Aljosa Osep, Patrick Dendorfer, Philip Torr, Andreas Geiger, Laura Leal-Taix\u00e9, and Bastian Leibe. 2021. Hota: A higher order metric for evaluating multi-object tracking. International journal of computer vision, Vol. 129 (2021), 548--578."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00864"},{"key":"e_1_3_2_1_42_1","volume-title":"MOT16: A benchmark for multi-object tracking. arXiv preprint arXiv:1603.00831","author":"Milan Anton","year":"2016","unstructured":"Anton Milan, Laura Leal-Taix\u00e9, Ian Reid, Stefan Roth, and Konrad Schindler. 2016. MOT16: A benchmark for multi-object tracking. arXiv preprint arXiv:1603.00831 (2016)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2022.103750"},{"key":"e_1_3_2_1_44_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_45_1","volume-title":"MotionTrack: Learning Robust Short-term and Long-term Motions for Multi-Object Tracking. arXiv preprint arXiv:2303.10404","author":"Qin Zheng","year":"2023","unstructured":"Zheng Qin, Sanping Zhou, Le Wang, Jinghai Duan, Gang Hua, and Wei Tang. 2023. MotionTrack: Learning Robust Short-term and Long-term Motions for Multi-Object Tracking. arXiv preprint arXiv:2303.10404 (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_47_1","volume-title":"Focus On Details: Online Multi-object Tracking with Diverse Fine-grained Representation. arXiv preprint arXiv:2302.14589","author":"Ren Hao","year":"2023","unstructured":"Hao Ren, Shoudong Han, Huilin Ding, Ziwen Zhang, Hongwei Wang, and Faquan Wang. 2023. Focus On Details: Online Multi-object Tracking with Diverse Fine-grained Representation. arXiv preprint arXiv:2302.14589 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00632"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01410"},{"key":"e_1_3_2_1_52_1","volume-title":"Simple Cues Lead to a Strong Multi-Object Tracker. arXiv preprint arXiv:2206.04656","author":"Seidenschwarz Jenny","year":"2022","unstructured":"Jenny Seidenschwarz, Guillem Braso, Ismail Elezi, and Laura Leal-Taixe. 2022. Simple Cues Lead to a Strong Multi-Object Tracker. arXiv preprint arXiv:2206.04656 (2022)."},{"key":"e_1_3_2_1_53_1","volume-title":"Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123","author":"Shao Shuai","year":"2018","unstructured":"Shuai Shao, Zijian Zhao, Boxun Li, Tete Xiao, Gang Yu, Xiangyu Zhang, and Jian Sun. 2018. Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461018"},{"key":"e_1_3_2_1_55_1","volume-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","author":"Shi Baoguang","year":"2016","unstructured":"Baoguang Shi, Xiang Bai, and Cong Yao. 2016. An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, Vol. 39, 11 (2016), 2298--2304."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01219"},{"key":"e_1_3_2_1_57_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01081"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.255"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02032"},{"key":"e_1_3_2_1_61_1","volume-title":"Transtrack: Multiple object tracking with transformer. arXiv preprint arXiv:2012.15460","author":"Sun Peize","year":"2020","unstructured":"Peize Sun, Jinkun Cao, Yi Jiang, Rufeng Zhang, Enze Xie, Zehuan Yuan, Changhu Wang, and Ping Luo. 2020. Transtrack: Multiple object tracking with transformer. arXiv preprint arXiv:2012.15460 (2020)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"e_1_3_2_1_63_1","volume-title":"Sequence to sequence learning with neural networks. Advances in neural information processing systems","author":"Sutskever Ilya","year":"2014","unstructured":"Ilya Sutskever, Oriol Vinyals, and Quoc V Le. 2014. Sequence to sequence learning with neural networks. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_64_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00813"},{"key":"e_1_3_2_1_66_1","volume-title":"OmniTracker: Unifying Object Tracking by Tracking-with-Detection. arXiv preprint arXiv:2303.12079","author":"Wang Junke","year":"2023","unstructured":"Junke Wang, Dongdong Chen, Zuxuan Wu, Chong Luo, Xiyang Dai, Lu Yuan, and Yu-Gang Jiang. 2023. OmniTracker: Unifying Object Tracking by Tracking-with-Detection. arXiv preprint arXiv:2303.12079 (2023)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00387"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561110"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_7"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"e_1_3_2_1_72_1","volume-title":"Referring Multi-Object Tracking. arXiv preprint arXiv:2303.03366","author":"Wu Dongming","year":"2023","unstructured":"Dongming Wu, Wencheng Han, Tiancai Wang, Xingping Dong, Xiangyu Zhang, and Jianbing Shen. 2023. Referring Multi-Object Tracking. arXiv preprint arXiv:2303.03366 (2023)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01217"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.360"},{"key":"e_1_3_2_1_75_1","volume-title":"Transcenter: Transformers with dense queries for multiple-object tracking. arXiv e-prints","author":"Xu Yihong","year":"2021","unstructured":"Yihong Xu, Yutong Ban, Guillaume Delorme, Chuang Gan, Daniela Rus, and Xavier Alameda-Pineda. 2021. Transcenter: Transformers with dense queries for multiple-object tracking. arXiv e-prints (2021), arXiv--2103."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_16"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2020.104091"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.234"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00478"},{"key":"e_1_3_2_1_80_1","volume-title":"Relationtrack: Relation-aware multiple object tracking with decoupled representation","author":"Yu En","year":"2022","unstructured":"En Yu, Zhuoling Li, Shoudong Han, and Hongwei Wang. 2022. Relationtrack: Relation-aware multiple object tracking with decoupled representation. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_1_81_1","volume-title":"Tel Aviv","author":"Zeng Fangao","year":"2022","unstructured":"Fangao Zeng, Bin Dong, Yuang Zhang, Tiancai Wang, Xiangyu Zhang, and Yichen Wei. 2022. Motr: End-to-end multiple-object tracking with transformer. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXVII. Springer, 659--675."},{"key":"e_1_3_2_1_82_1","volume-title":"Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605","author":"Zhang Hao","year":"2022","unstructured":"Hao Zhang, Feng Li, Shilong Liu, Lei Zhang, Hang Su, Jun Zhu, Lionel M Ni, and Heung-Yeung Shum. 2022a. Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605 (2022)."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.474"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2993073"},{"key":"e_1_3_2_1_85_1","volume-title":"Tel Aviv","author":"Zhang Yifu","year":"2022","unstructured":"Yifu Zhang, Peize Sun, Yi Jiang, Dongdong Yu, Fucheng Weng, Zehuan Yuan, Ping Luo, Wenyu Liu, and Xinggang Wang. 2022b. Bytetrack: Multi-object tracking by associating every detection box. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXII. Springer, 1--21."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01513-4"},{"key":"e_1_3_2_1_87_1","volume-title":"MOTRv2: Bootstrapping End-to-End Multi-Object Tracking by Pretrained Object Detectors. arXiv preprint arXiv:2211.09791","author":"Zhang Yuang","year":"2022","unstructured":"Yuang Zhang, Tiancai Wang, and Xiangyu Zhang. 2022c. MOTRv2: Bootstrapping End-to-End Multi-Object Tracking by Pretrained Object Detectors. arXiv preprint arXiv:2211.09791 (2022)."},{"key":"e_1_3_2_1_88_1","volume-title":"Tel Aviv","author":"Zhao Zelin","year":"2022","unstructured":"Zelin Zhao, Ze Wu, Yueqing Zhuang, Boxun Li, and Jiaya Jia. 2022. Tracking objects as pixel-wise distributions. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXII. Springer, 76--94."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.357"},{"key":"e_1_3_2_1_90_1","volume-title":"UK","author":"Zhou Xingyi","year":"2020","unstructured":"Xingyi Zhou, Vladlen Koltun, and Philipp Kr\u00e4henb\u00fchl. 2020. Tracking objects as points. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part IV. Springer, 474--490."},{"key":"e_1_3_2_1_91_1","volume-title":"Objects as points arXiv preprint arXiv:1904.07850","author":"Zhou Xingyi","year":"2019","unstructured":"Xingyi Zhou, Dequan Wang, and Philipp Kr\u00e4henb\u00fchl. 2019. Objects as points arXiv preprint arXiv:1904.07850 (2019)."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00857"},{"key":"e_1_3_2_1_93_1","volume-title":"Visual Prompt Multi-Modal Tracking. arXiv preprint arXiv:2303.10826","author":"Zhu Jiawen","year":"2023","unstructured":"Jiawen Zhu, Simiao Lai, Xin Chen, Dong Wang, and Huchuan Lu. 2023. Visual Prompt Multi-Modal Tracking. arXiv preprint arXiv:2303.10826 (2023)."},{"key":"e_1_3_2_1_94_1","volume-title":"Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611728","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611728","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:10:06Z","timestamp":1755821406000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611728"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":94,"alternative-id":["10.1145\/3581783.3611728","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611728","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}