{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T05:03:50Z","timestamp":1780376630683,"version":"3.54.1"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100025517","name":"Big Data Computing Center, Southeast University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100025517","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271143"],"award-info":[{"award-number":["62271143"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113552","type":"journal-article","created":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T16:24:49Z","timestamp":1774110289000},"page":"113552","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Tracking by detection and query: An efficient end-to-end framework for multi-object tracking"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7379-2534","authenticated-orcid":false,"given":"Shukun","family":"Jia","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5872-7566","authenticated-orcid":false,"given":"Shiyu","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2997-4012","authenticated-orcid":false,"given":"Yichao","family":"Cao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3495-5257","authenticated-orcid":false,"given":"Feng","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7669-1674","authenticated-orcid":false,"given":"Xin","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7707-7538","authenticated-orcid":false,"given":"Xiaobo","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113552_bib0001","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111623","article-title":"Indoor scene multi-object tracking based on region search and memory buffer pool","volume":"165","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113552_bib0002","doi-asserted-by":"crossref","first-page":"1462","DOI":"10.1109\/TMM.2023.3234822","article-title":"Robust multi-drone multi-target tracking to resolve target occlusion: a benchmark","volume":"25","author":"Liu","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.113552_bib0003","series-title":"2016\u202fIEEE International Conference on Image Processing (ICIP)","first-page":"3464","article-title":"Simple online and realtime tracking","author":"Bewley","year":"2016"},{"key":"10.1016\/j.patcog.2026.113552_bib0004","series-title":"European Conference on Computer Vision","first-page":"1","article-title":"ByteTrack: multi-object tracking by associating every detection box","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113552_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109107","article-title":"Detection confidence driven multi-object tracking to recover reliable tracks from unreliable detections","volume":"135","author":"Mandel","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113552_bib0006","series-title":"European Conference on Computer Vision","first-page":"659","article-title":"MOTR: end-to-end multiple-object tracking with transformer","author":"Zeng","year":"2022"},{"key":"10.1016\/j.patcog.2026.113552_bib0007","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"9901","article-title":"MeMOTR: long-term memory-augmented transformer for multi-object tracking","author":"Gao","year":"2023"},{"key":"10.1016\/j.patcog.2026.113552_bib0008","series-title":"International Conference on Learning Representations","article-title":"CO-MOT: Boosting end-to-end transformer-based multi-object tracking via coopetition label assignment and shadow sets","author":"yan","year":"2025"},{"key":"10.1016\/j.patcog.2026.113552_bib0009","series-title":"European Conference on Computer Vision","first-page":"474","article-title":"Tracking objects as points","author":"Zhou","year":"2020"},{"key":"10.1016\/j.patcog.2026.113552_bib0010","unstructured":"Z. Ge, S. Liu, F. Wang, Z. Li, J. Sun, YOLOX: Exceeding YOLO series in 2021, (2021) arXiv: 2107.08430."},{"key":"10.1016\/j.patcog.2026.113552_bib0011","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"27883","article-title":"Multiple object tracking as id prediction","author":"Gao","year":"2025"},{"key":"10.1016\/j.patcog.2026.113552_bib0012","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"22056","article-title":"MOTRv2: bootstrapping end-to-end multi-object tracking by pretrained object detectors","author":"Zhang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113552_bib0013","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"20993","article-title":"DanceTrack: multi-object tracking in uniform appearance and diverse motion","author":"Sun","year":"2022"},{"key":"10.1016\/j.patcog.2026.113552_bib0014","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"9921","article-title":"SportsMOT: a large multi-object tracking dataset in multiple sports scenes","author":"Cui","year":"2023"},{"key":"10.1016\/j.patcog.2026.113552_bib0015","unstructured":"P. Dendorfer, MOT20: A benchmark for multi object tracking in crowded scenes, (2020) arXiv: 2003.09003."},{"key":"10.1016\/j.patcog.2026.113552_bib0016","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"6504","article-title":"Hybrid-SORT: weak cues matter for online multi-object tracking","volume":"38","author":"Yang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113552_bib0017","doi-asserted-by":"crossref","first-page":"3069","DOI":"10.1007\/s11263-021-01513-4","article-title":"FairMOT: on the fairness of detection and re-identification in multiple object tracking","volume":"129","author":"Zhang","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113552_bib0018","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9686","article-title":"Observation-Centric SORT: rethinking SORT for robust multi-object tracking","author":"Cao","year":"2023"},{"key":"10.1016\/j.patcog.2026.113552_bib0019","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11289","article-title":"Focus on details: online multi-object tracking with diverse fine-grained representation","author":"Ren","year":"2023"},{"key":"10.1016\/j.patcog.2026.113552_bib0020","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1109\/TCE.2025.3541839","article-title":"PD-SORT: occlusion-robust multi-object tracking using pseudo-depth cues","volume":"71","author":"Wang","year":"2025","journal-title":"IEEE Trans. Consum. Electron."},{"issue":"4","key":"10.1016\/j.patcog.2026.113552_bib0021","doi-asserted-by":"crossref","DOI":"10.3390\/app15041907","article-title":"ReTrackVLM: transformer-enhanced multi-object tracking with cross-modal embeddings and zero-shot re-Identification integration","volume":"15","author":"Bayraktar","year":"2025","journal-title":"Appl. Sci."},{"key":"10.1016\/j.patcog.2026.113552_bib0022","doi-asserted-by":"crossref","first-page":"3009","DOI":"10.1109\/TMM.2025.3557619","article-title":"Open-vocabulary multi-object tracking with domain generalized and temporally adaptive features","volume":"27","author":"Li","year":"2025","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.113552_bib0023","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111091","article-title":"SeaTrack: rethinking observation-centric SORT for robust nearshore multiple object tracking","volume":"159","author":"Ding","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113552_bib0024","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8844","article-title":"TrackFormer: multi-object tracking with transformers","author":"Meinhardt","year":"2022"},{"issue":"11","key":"10.1016\/j.patcog.2026.113552_bib0025","doi-asserted-by":"crossref","first-page":"12783","DOI":"10.1109\/TPAMI.2022.3213073","article-title":"Looking beyond two frames: end-to-end multi-object tracking using spatial and temporal transformers","volume":"45","author":"Zhu","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113552_bib0026","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8090","article-title":"MeMOT: multi-object tracking with memory","author":"Cai","year":"2022"},{"key":"10.1016\/j.patcog.2026.113552_bib0027","series-title":"International Conference on Learning Representations","article-title":"DAB-DETR: dynamic anchor boxes are better queries for DETR","author":"Liu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113552_bib0028","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"16965","article-title":"DETRs beat YOLOs on real-time object detection","author":"Zhao","year":"2024"},{"key":"10.1016\/j.patcog.2026.113552_bib0029","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113552_bib0030","doi-asserted-by":"crossref","first-page":"548","DOI":"10.1007\/s11263-020-01375-2","article-title":"HOTA: a higher order metric for evaluating multi-object tracking","volume":"129","author":"Luiten","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113552_bib0031","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/2008\/246309","article-title":"Evaluating multiple object tracking performance: the CLEAR MOT metrics","volume":"2008","author":"Bernardin","year":"2008","journal-title":"EURASIP J. Image Video Process."},{"key":"10.1016\/j.patcog.2026.113552_bib0032","series-title":"European Conference on Computer Vision","first-page":"17","article-title":"Performance measures and a data set for multi-target, multi-camera tracking","author":"Ristani","year":"2016"},{"key":"10.1016\/j.patcog.2026.113552_bib0033","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2026.113242","article-title":"DecoderTracker: decoder-only end-to-end method for multiple-object tracking","volume":"177","author":"Liao","year":"2026","journal-title":"Pattern Recognit."},{"issue":"1","key":"10.1016\/j.patcog.2026.113552_bib0034","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10489-024-05866-4","article-title":"ETTrack: enhanced temporal motion predictor for multi-object tracking","volume":"55","author":"Han","year":"2025","journal-title":"Appl. Intell."},{"key":"10.1016\/j.patcog.2026.113552_bib0035","article-title":"MambaMOT: state-space model as motion predictor for multi-object tracking","author":"Huang","year":"2024","journal-title":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},{"key":"10.1016\/j.patcog.2026.113552_bib0036","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111169","article-title":"Learning data association for multi-object tracking using only coordinates","volume":"160","author":"Miah","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113552_bib0037","doi-asserted-by":"crossref","first-page":"3182","DOI":"10.1109\/TIP.2022.3165376","article-title":"Rethinking the competition between detection and reid in multiobject tracking","volume":"31","author":"Liang","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113552_bib0038","doi-asserted-by":"crossref","first-page":"4473","DOI":"10.1109\/TCSVT.2023.3339609","article-title":"One-shot multiple object tracking with robust id preservation","volume":"34","author":"Lv","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113552_bib0039","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110369","article-title":"Motion-guided and occlusion-aware multi-object tracking with hierarchical matching","volume":"151","author":"Zheng","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113552_bib0040","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110785","article-title":"Visual multi-object tracking with re-identification and occlusion handling using labeled random finite sets","volume":"156","author":"Van Ma","year":"2024","journal-title":"Pattern Recognit."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005182?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005182?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T04:13:54Z","timestamp":1780373634000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326005182"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":40,"alternative-id":["S0031320326005182"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113552","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Tracking by detection and query: An efficient end-to-end framework for multi-object tracking","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113552","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113552"}}