{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:05:25Z","timestamp":1778083525658,"version":"3.51.4"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031728471","type":"print"},{"value":"9783031728488","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72848-8_10","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:37:54Z","timestamp":1732801074000},"page":"161-177","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["JDT3D: Addressing the\u00a0Gaps in\u00a0LiDAR-Based Tracking-by-Attention"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-4110-6504","authenticated-orcid":false,"given":"Brian","family":"Cheong","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1234-1883","authenticated-orcid":false,"given":"Jiachen","family":"Zhou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4217-4415","authenticated-orcid":false,"given":"Steven","family":"Waslander","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Bai, X., et al.: Transfusion: robust lidar-camera fusion for 3D object detection with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1090\u20131099 (2022)","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11618\u201311628 (2019). https:\/\/api.semanticscholar.org\/CorpusID:85517967","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"10_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: Focalformer3d : focusing on hard instance for 3D object detection. ArXiv abs\/2308.04556 (2023). https:\/\/api.semanticscholar.org\/CorpusID:260736073","DOI":"10.1109\/ICCV51070.2023.00771"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Chiu, H.k., Prioletti, A., Li, J., Bohg, J.: Probabilistic 3D multi-object tracking for autonomous driving. arXiv preprint arXiv:2001.05673 (2020)","DOI":"10.1109\/ICRA48506.2021.9561754"},{"key":"10_CR6","unstructured":"Hahner, M., Dai, D., Liniger, A., Gool, L.V.: Quantifying data augmentation for lidar based 3d object detection. CoRR abs\/2004.01643 (2020). https:\/\/arxiv.org\/abs\/2004.01643"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Hu, Y., et\u00a0al.: Planning-oriented autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17853\u201317862 (2023)","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Huang, T.E., Liu, Y., Van\u00a0Gool, L., Yu, F.: Video task decathlon: Unifying image and video tasks in autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8647\u20138657 (2023)","DOI":"10.1109\/ICCV51070.2023.00794"},{"key":"10_CR9","doi-asserted-by":"publisher","unstructured":"Koh, J., Kim, J., Yoo, J.H., Kim, Y., Kum, D., Choi, J.W.: Joint 3D object detection and tracking using spatio-temporal representation of camera image and lidar point clouds. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36. no. 1, pp. 1210\u20131218 (2022). https:\/\/doi.org\/10.1609\/aaai.v36i1.20007, https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/20007","DOI":"10.1609\/aaai.v36i1.20007"},{"key":"10_CR10","unstructured":"Kuhn, H.W.: The hungarian method for the assignment problem. Naval Res. Logistics (NRL) 52 (1955). https:\/\/api.semanticscholar.org\/CorpusID:9426884"},{"issue":"3","key":"10_CR11","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1007\/S11263-019-01204-1","volume":"128","author":"H Law","year":"2020","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. Int. J. Comput. Vis. 128(3), 642\u2013656 (2020). https:\/\/doi.org\/10.1007\/S11263-019-01204-1","journal-title":"Int. J. Comput. Vis."},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Poly-mot: a polyhedral framework for 3d multi-object tracking. ArXiv abs\/2307.16675 (2023). https:\/\/api.semanticscholar.org\/CorpusID:260334560","DOI":"10.1109\/IROS55552.2023.10341778"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: End-to-end 3D tracking with decoupled queries. In: IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.01678"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R.B., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2999\u20133007 (2017). https:\/\/api.semanticscholar.org\/CorpusID:47252984","DOI":"10.1109\/ICCV.2017.324"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: BEVFusion: multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 2774\u20132781 (2022). https:\/\/api.semanticscholar.org\/CorpusID:249097415","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Luo, C., Yang, X., Yuille, A.: Exploring simple 3D multi-object tracking for autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10488\u201310497 (2021)","DOI":"10.1109\/ICCV48922.2021.01032"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., Hu, J.S.K., Waslander, S.L.: Dense voxel fusion for 3D object detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 663\u2013672 (2023)","DOI":"10.1109\/WACV56688.2023.00073"},{"key":"10_CR18","doi-asserted-by":"publisher","unstructured":"Meinhardt, T., Kirillov, A., Leal-Taix\u00e9, L., Feichtenhofer, C.: Trackformer: multi-object tracking with transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8834\u20138844 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00864","DOI":"10.1109\/CVPR52688.2022.00864"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Pang, Z., Li, J., Tokmakov, P., Chen, D., Zagoruyko, S., Wang, Y.X.: Standing between past and future: Spatio-temporal modeling for multi-camera 3D multi-object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17928\u201317938 (2023)","DOI":"10.1109\/CVPR52729.2023.01719"},{"key":"10_CR20","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"680","DOI":"10.1007\/978-3-031-25056-9_43","volume-title":"ECCV 2022","author":"Z Pang","year":"2021","unstructured":"Pang, Z., Li, Z., Wang, N.: SimpleTrack: understanding and Rethinking 3D Multi-object Tracking. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13801, pp. 680\u2013696. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-031-25056-9_43"},{"key":"10_CR21","unstructured":"Sun, P., et al.: TransTrack: multiple-object tracking with transformer. arXiv preprint arXiv: 2012.15460 (2020)"},{"key":"10_CR22","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Camo-mot: combined appearance-motion optimization for 3D multi-object tracking with camera-lidar fusion. IEEE Trans. Intell. Transp. Syst. 24, 11981\u201311996 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252089635","DOI":"10.1109\/TITS.2023.3285651"},{"key":"10_CR24","doi-asserted-by":"publisher","unstructured":"Weng, X., Wang, J., Held, D., Kitani, K.: 3D multi-object tracking: a baseline and new evaluation metrics. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 10359\u201310366 (2020). https:\/\/doi.org\/10.1109\/IROS45743.2020.9341164","DOI":"10.1109\/IROS45743.2020.9341164"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Yan, Y., Mao, Y., Li, B.: Second: sparsely embedded convolutional detection. Sensors (Basel, Switzerland) 18 (2018). https:\/\/api.semanticscholar.org\/CorpusID:52957856","DOI":"10.3390\/s18103337"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Yin, T., Zhou, X., Krahenbuhl, P.: Center-based 3D object detection and tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11784\u201311793 (2021)","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"10_CR27","unstructured":"Zeng, F., Dong, B., Wang, T., Chen, C., Zhang, X., Wei, Y.: Motr: end-to-end multiple-object tracking with transformer. ArXiv abs\/2105.03247 (2021). https:\/\/api.semanticscholar.org\/CorpusID:234096063"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Zeng, Y., Ma, C., Zhu, M., Fan, Z., Yang, X.: Cross-modal 3D object detection and tracking for auto-driving. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS 2021, Prague, Czech Republic, September 2021. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636498"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, C., Zhang, C., Guo, Y., Chen, L., Happold, M.: MotionTrack: end-to-end transformer-based multi-object tracking with lidar-camera fusion. IN: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 151\u2013160 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259286967","DOI":"10.1109\/CVPRW59228.2023.00020"},{"key":"10_CR30","doi-asserted-by":"publisher","unstructured":"Zhang, T., Chen, X., Wang, Y., Wang, Y., Zhao, H.: Mutr3d: a multi-camera tracking framework via 3d-to-2d queries. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 4536\u20134545 (2022). https:\/\/doi.org\/10.1109\/CVPRW56347.2022.00500","DOI":"10.1109\/CVPRW56347.2022.00500"},{"key":"10_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Wang, T., Zhang, X.: MOTRv2: bootstrapping end-to-end multi-object tracking by pretrained object detectors. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 22056\u201322065 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253581846","DOI":"10.1109\/CVPR52729.2023.02112"},{"key":"10_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: Voxelnet: end-to-end learning for point cloud based 3D object detection. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4490\u20134499 (2017). https:\/\/api.semanticscholar.org\/CorpusID:42427078","DOI":"10.1109\/CVPR.2018.00472"},{"key":"10_CR33","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021 (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72848-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:05:42Z","timestamp":1732802742000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72848-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031728471","9783031728488"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72848-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}