{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T09:31:58Z","timestamp":1777887118695,"version":"3.51.4"},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010008","name":"Postdoctoral Science Foundation of Guangxi Province of China","doi-asserted-by":"publisher","award":["2025M770535"],"award-info":[{"award-number":["2025M770535"]}],"id":[{"id":"10.13039\/501100010008","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["xzy012024068"],"award-info":[{"award-number":["xzy012024068"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013804","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013804","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U24A20291"],"award-info":[{"award-number":["U24A20291"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Vision and Image Understanding"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1016\/j.cviu.2026.104682","type":"journal-article","created":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T20:13:44Z","timestamp":1771359224000},"page":"104682","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["QB-MOTR: A simple query bootstrapping end-to-end multi-object tracking method with transformer"],"prefix":"10.1016","volume":"265","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0572-8377","authenticated-orcid":false,"given":"Zifan","family":"Han","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2772-2700","authenticated-orcid":false,"given":"Xuchong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongbin","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.cviu.2026.104682_b1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/2008\/246309","article-title":"Evaluating multiple object tracking performance: the clear mot metrics","volume":"2008","author":"Bernardin","year":"2008","journal-title":"EURASIP J. Image Video Process."},{"key":"10.1016\/j.cviu.2026.104682_b2","series-title":"Simple online and realtime tracking","first-page":"3464","author":"Bewley","year":"2016"},{"key":"10.1016\/j.cviu.2026.104682_b3","series-title":"Memot: Multi-object tracking with memory","first-page":"8090","author":"Cai","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b4","series-title":"Observation-centric sort: Rethinking sort for robust multi-object tracking","first-page":"9686","author":"Cao","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b5","series-title":"End-to-end object detection with transformers","first-page":"213","author":"Carion","year":"2020"},{"key":"10.1016\/j.cviu.2026.104682_b6","series-title":"SportsMOT: A large multi-object tracking dataset in multiple sports scenes","author":"Cui","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b7","doi-asserted-by":"crossref","DOI":"10.1109\/TPAMI.2023.3301975","article-title":"Qdtrack: Quasi-dense similarity learning for appearance-only multiple object tracking","author":"Fischer","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.cviu.2026.104682_b8","series-title":"MeMOTR: Long-term memory-augmented transformer for multi-object tracking","first-page":"9901","author":"Gao","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b9","series-title":"Yolox: Exceeding yolo series in 2021","author":"Ge","year":"2021"},{"issue":"11","key":"10.1016\/j.cviu.2026.104682_b10","doi-asserted-by":"crossref","first-page":"1231","DOI":"10.1177\/0278364913491297","article-title":"Vision meets robotics: The kitti dataset","volume":"32","author":"Geiger","year":"2013","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.cviu.2026.104682_b11","series-title":"Deep residual learning for image recognition","first-page":"770","author":"He","year":"2016"},{"key":"10.1016\/j.cviu.2026.104682_b12","series-title":"Iterative scale-up ExpansionIoU and deep features association for multi-object tracking in sports","author":"Huang","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2022.103586","article-title":"Multi-object tracking with robust object regression and association","volume":"227","author":"Li","year":"2023","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.cviu.2026.104682_b14","doi-asserted-by":"crossref","unstructured":"Lin,\u00a0T.-Y., Goyal,\u00a0P., Girshick,\u00a0R., He,\u00a0K., Doll\u00e1r,\u00a0P., 2017. Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"10.1016\/j.cviu.2026.104682_b15","series-title":"Microsoft coco: Common objects in context","first-page":"740","author":"Lin","year":"2014"},{"key":"10.1016\/j.cviu.2026.104682_b16","series-title":"Dab-detr: Dynamic anchor boxes are better queries for detr","author":"Liu","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b17","doi-asserted-by":"crossref","first-page":"548","DOI":"10.1007\/s11263-020-01375-2","article-title":"Hota: A higher order metric for evaluating multi-object tracking","volume":"129","author":"Luiten","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.cviu.2026.104682_b18","series-title":"Real-time kinematic ground truth for the oxford robotcar dataset","author":"Maddern","year":"2020"},{"key":"10.1016\/j.cviu.2026.104682_b19","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8844","article-title":"Trackformer: Multi-object tracking with transformers","author":"Meinhardt","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b20","series-title":"MOT16: A benchmark for multi-object tracking","author":"Milan","year":"2016"},{"key":"10.1016\/j.cviu.2026.104682_b21","series-title":"Toward driving scene understanding: A dataset for learning driver behavior and causal reasoning","first-page":"7699","author":"Ramanishka","year":"2018"},{"key":"10.1016\/j.cviu.2026.104682_b22","doi-asserted-by":"crossref","unstructured":"Rezatofighi,\u00a0H., Tsoi,\u00a0N., Gwak,\u00a0J., Sadeghian,\u00a0A., Reid,\u00a0I., Savarese,\u00a0S., 2019. Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 658\u2013666.","DOI":"10.1109\/CVPR.2019.00075"},{"key":"10.1016\/j.cviu.2026.104682_b23","series-title":"Performance measures and a data set for multi-target, multi-camera tracking","first-page":"17","author":"Ristani","year":"2016"},{"key":"10.1016\/j.cviu.2026.104682_b24","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2022.103569","article-title":"A multi-object tracker using dynamic Bayesian networks and a residual neural network based similarity estimator","volume":"225","author":"Saada","year":"2022","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.cviu.2026.104682_b25","series-title":"Crowdhuman: A benchmark for detecting human in a crowd","author":"Shao","year":"2018"},{"key":"10.1016\/j.cviu.2026.104682_b26","series-title":"Dancetrack: Multi-object tracking in uniform appearance and diverse motion","first-page":"20993","author":"Sun","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b27","series-title":"Transtrack: Multiple object tracking with transformer","author":"Sun","year":"2020"},{"key":"10.1016\/j.cviu.2026.104682_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2022.103433","article-title":"STURE: Spatial\u2013temporal mutual representation learning for robust data association in online multi-object tracking","volume":"220","author":"Wang","year":"2022","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.cviu.2026.104682_b29","series-title":"Simple online and realtime tracking with a deep association metric","first-page":"3645","author":"Wojke","year":"2017"},{"key":"10.1016\/j.cviu.2026.104682_b30","series-title":"Track to detect and segment: An online multi-object tracker","first-page":"12352","author":"Wu","year":"2021"},{"key":"10.1016\/j.cviu.2026.104682_b31","series-title":"Multiple object tracking challenge technical report for team MT_IoT","author":"Yan","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b32","series-title":"Hard to track objects with irregular motions and similar appearances? make it easier by buffering the matching space","first-page":"4799","author":"Yang","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b33","series-title":"Bdd100k: A diverse driving dataset for heterogeneous multitask learning","first-page":"2636","author":"Yu","year":"2020"},{"key":"10.1016\/j.cviu.2026.104682_b34","series-title":"MOTRv3: Release-fetch supervision for end-to-end multi-object tracking","author":"Yu","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b35","series-title":"Motr: End-to-end multiple-object tracking with transformer","first-page":"659","author":"Zeng","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b36","series-title":"Bytetrack: Multi-object tracking by associating every detection box","first-page":"1","author":"Zhang","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b37","doi-asserted-by":"crossref","first-page":"3069","DOI":"10.1007\/s11263-021-01513-4","article-title":"Fairmot: On the fairness of detection and re-identification in multiple object tracking","volume":"129","author":"Zhang","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.cviu.2026.104682_b38","series-title":"Motrv2: Bootstrapping end-to-end multi-object tracking by pretrained object detectors","first-page":"22056","author":"Zhang","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b39","series-title":"Streaming video model","first-page":"14602","author":"Zhao","year":"2023"},{"key":"10.1016\/j.cviu.2026.104682_b40","series-title":"Tracking objects as points","first-page":"474","author":"Zhou","year":"2020"},{"key":"10.1016\/j.cviu.2026.104682_b41","series-title":"Global tracking transformers","first-page":"8771","author":"Zhou","year":"2022"},{"key":"10.1016\/j.cviu.2026.104682_b42","series-title":"Deformable detr: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020"}],"container-title":["Computer Vision and Image Understanding"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226000494?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226000494?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T04:01:41Z","timestamp":1777608101000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1077314226000494"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":42,"alternative-id":["S1077314226000494"],"URL":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104682","relation":{},"ISSN":["1077-3142"],"issn-type":[{"value":"1077-3142","type":"print"}],"subject":[],"published":{"date-parts":[[2026,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"QB-MOTR: A simple query bootstrapping end-to-end multi-object tracking method with transformer","name":"articletitle","label":"Article Title"},{"value":"Computer Vision and Image Understanding","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104682","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104682"}}