{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:56:52Z","timestamp":1781539012530,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276141"],"award-info":[{"award-number":["62276141"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810875","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1553-1557","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Robust Exemplar Prompt Learning via Bi-directional Visual-Semantic Alignment for Multi-Object Tracking"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3648-0678","authenticated-orcid":false,"given":"Lingyan","family":"Liang","sequence":"first","affiliation":[{"name":"IEIT SYSTEMS Co., Ltd, China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6661-1371","authenticated-orcid":false,"given":"Zhibin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tianjin University of Technology, Tianjin, China, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3147-1011","authenticated-orcid":false,"given":"Gang","family":"Dong","sequence":"additional","affiliation":[{"name":"IEIT SYSTEMS Co., Ltd, China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7311-1842","authenticated-orcid":false,"given":"Dongchao","family":"Wen","sequence":"additional","affiliation":[{"name":"IEIT SYSTEMS Co., Ltd, China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1613-3401","authenticated-orcid":false,"given":"Kaihua","family":"Zhang","sequence":"additional","affiliation":[{"name":"Southeast University, Nanjing, China, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00792"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00393"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00347"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00672"},{"key":"e_1_3_3_1_8_2","unstructured":"Patrick Dendorfer Hamid Rezatofighi Anton Milan Javen Shi Daniel Cremers Ian Reid Stefan Roth Konrad Schindler and Laura Leal-Taix\u00e9. 2020. Mot20: A benchmark for multi object tracking in crowded scenes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2003.09003 (2020)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00908"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093384"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Seong-Ho Lee Dae-Hyeon Park and Seung-Hwan Bae. 2023. Decode-mot: How can we hurdle frames to go beyond tracking-by-detection? IEEE Transactions on Image Processing 32 (2023) 4378\u20134392.","DOI":"10.1109\/TIP.2023.3298538"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72761-0_16"},{"key":"e_1_3_3_1_13_2","unstructured":"Yunhao Li Xiaoqiong Liu Luke Liu Heng Fan and Libo Zhang. 2024. LaMOT: Language-Guided Multi-Object Tracking. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.08324 (2024)."},{"key":"e_1_3_3_1_14_2","first-page":"1546","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"Liang Chao","year":"2022","unstructured":"Chao Liang, Zhipeng Zhang, Xue Zhou, Bing Li, and Weiming Hu. 2022. One more check: making \u201cfake background\u201d be tracked again. In Proceedings of the AAAI Conference on Artificial Intelligence. 1546\u20131554."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Jonathon Luiten Aljosa Osep Patrick Dendorfer Philip Torr Andreas Geiger Laura Leal-Taix\u00e9 and Bastian Leibe. 2021. Hota: A higher order metric for evaluating multi-object tracking. International journal of computer vision 129 (2021) 548\u2013578.","DOI":"10.1007\/s11263-020-01375-2"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.352"},{"key":"e_1_3_3_1_18_2","unstructured":"Anton Milan Laura Leal-Taix\u00e9 Ian Reid Stefan Roth and Konrad Schindler. 2016. MOT16: A benchmark for multi-object tracking. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1603.00831 (2016)."},{"key":"e_1_3_3_1_19_2","volume-title":"International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445"},{"key":"e_1_3_3_1_20_2","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48881-3_2"},{"key":"e_1_3_3_1_22_2","unstructured":"Shuai Shao Zijian Zhao Boxun Li Tete Xiao Gang Yu Xiangyu Zhang and Jian Sun. 2018. Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1805.00123 (2018)."},{"key":"e_1_3_3_1_23_2","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9 11 (2008)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Linh Van\u00a0Ma Tran Thien\u00a0Dat Nguyen Changbeom Shim Du\u00a0Yong Kim Namkoo Ha and Moongu Jeon. 2024. Visual multi-object tracking with re-identification and occlusion handling using labeled random finite sets. Pattern Recognition 156 (2024) 110785.","DOI":"10.1016\/j.patcog.2024.110785"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00813"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Yihong Xu Yutong Ban Guillaume Delorme Chuang Gan Daniela Rus and Xavier Alameda-Pineda. 2022. TransCenter: Transformers with dense representations for multiple-object tracking. IEEE transactions on pattern analysis and machine intelligence 45 6 (2022) 7820\u20137835.","DOI":"10.1109\/TPAMI.2022.3225078"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00863"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25437"},{"key":"e_1_3_3_1_30_2","unstructured":"En Yu Tiancai Wang Zhuoling Li Yuang Zhang Xiangyu Zhang and Wenbing Tao. 2024. MOTRv3: Release-Fetch Supervision for End-to-End Multi-Object Tracking. https:\/\/openreview.net\/forum?id=ezPbPoYFME"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_38"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_1"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"Yanchao Zhang Wenbo Zhang Jiya Yu Leiying He Jianneng Chen and Yong He. 2022. Complete and accurate holly fruits counting using YOLOX object detection. Computers and Electronics in Agriculture 198 (2022) 107062.","DOI":"10.1016\/j.compag.2022.107062"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Tao Zhou Qi Ye Wenhan Luo Haizhou Ran Zhiguo Shi and Jiming Chen. 2025. Apptracker+: Displacement uncertainty for occlusion handling in low-frame-rate multiple object tracking. International Journal of Computer Vision 133 4 (2025) 2044\u20132069.","DOI":"10.1007\/s11263-024-02237-x"},{"key":"e_1_3_3_1_35_2","unstructured":"Xizhou Zhu Weijie Su Lewei Lu Bin Li Xiaogang Wang and Jifeng Dai. 2020. Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.04159 (2020)."}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:34:26Z","timestamp":1781537666000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810875"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":34,"alternative-id":["10.1145\/3805622.3810875","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810875","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}