{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T21:15:42Z","timestamp":1779311742286,"version":"3.51.4"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585471","type":"print"},{"value":"9783030585488","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58548-8_28","type":"book-chapter","created":{"date-parts":[[2020,10,28]],"date-time":"2020-10-28T23:02:42Z","timestamp":1603926162000},"page":"474-490","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1052,"title":["Tracking Objects as Points"],"prefix":"10.1007","author":[{"given":"Xingyi","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vladlen","family":"Koltun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philipp","family":"Kr\u00e4henb\u00fchl","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,29]]},"reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Bergmann, P., Meinhardt, T., Leal-Taixe, L.: Tracking without bells and whistles. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00103"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F., Upcroft, B.: Simple online and realtime tracking. In: ICIP (2016)","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Agrawal, P., Fragkiadaki, K., Malik, J.: Human pose estimation with iterative error feedback. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.512"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Choi, W., Savarese, S.: Multiple target tracking in world coordinate with single, minimally calibrated camera. In: ECCV (2010)","DOI":"10.1007\/978-3-642-15561-1_40"},{"issue":"10","key":"28_CR6","doi-asserted-by":"publisher","first-page":"1858","DOI":"10.1109\/TPAMI.2008.113","volume":"30","author":"GD Evangelidis","year":"2008","unstructured":"Evangelidis, G.D., Psarakis, E.Z.: Parametric image alignment using enhanced correlation coefficient maximization. IEEE Trans. Pattern Anal. Mach. Intell. 30(10), 1858\u20131865 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Fang, K., Xiang, Y., Li, X., Savarese, S.: Recurrent autoregressive networks for online multi-object tracking. In: WACV (2018)","DOI":"10.1109\/WACV.2018.00057"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Detect to track and track to detect. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.330"},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P.F., Girshick, R.B., McAllester, D., Ramanan, D.: Object detection with discriminatively trained part-based models. In: TPAMI (2009)","DOI":"10.1109\/TPAMI.2009.167"},{"key":"28_CR10","unstructured":"Feng, W., Hu, Z., Wu, W., Yan, J., Ouyang, W.: Multi-object tracking with multiple cues and switcher-aware classification. arXiv:1901.06129 (2019)"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Fieraru, M., Khoreva, A., Pishchulin, L., Schiele, B.: Learning to refine human pose estimation. In: CVPR Workshops (2018)","DOI":"10.1109\/CVPRW.2018.00058"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Held, D., Thrun, S., Savarese, S.: Learning to track at 100 FPS with deep regression networks. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46448-0_45"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Hu, H.N., et al.: Joint monocular 3D detection and tracking. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00549"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Ilg, E., Mayer, N., Saikia, T., Keuper, M., Dosovitskiy, A., Brox, T.: FlowNet 2.0: evolution of optical flow estimation with deep networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.179"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Kang, K., et al.: Object detection in videos with tubelet proposal networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.101"},{"issue":"10","key":"28_CR17","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","volume":"28","author":"K Kang","year":"2017","unstructured":"Kang, K., et al.: T-CNN: tubelets with convolutional neural networks for object detection from videos. Circuits Syst. Video Technol. 28(10), 2896\u20132907 (2017)","journal-title":"Circuits Syst. Video Technol."},{"key":"28_CR18","doi-asserted-by":"publisher","first-page":"104423","DOI":"10.1109\/ACCESS.2019.2932301","volume":"7","author":"H Karunasekera","year":"2019","unstructured":"Karunasekera, H., Wang, H., Zhang, H.: Multiple object tracking with attention to appearance, structure, motion and size. IEEE Access 7, 104423\u2013104434 (2019)","journal-title":"IEEE Access"},{"issue":"1","key":"28_CR19","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1109\/TPAMI.2018.2876253","volume":"42","author":"M Keuper","year":"2018","unstructured":"Keuper, M., Tang, S., Andres, B., Brox, T., Schiele, B.: Motion segmentation and multiple object tracking by correlation co-clustering. IEEE Trans. Pattern Anal. Mach. Intell. 42(1), 140\u2013153 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"28_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Leal-Taix\u00e9, L., Canton-Ferrer, C., Schindler, K.: Learning by tracking: Siamese CNN for robust target association. In: CVPR Workshops (2016)","DOI":"10.1109\/CVPRW.2016.59"},{"key":"28_CR23","unstructured":"Leal-Taix\u00e9, L., Milan, A., Schindler, K., Cremers, D., Reid, I., Roth, S.: Tracking the trackers: an analysis of the state of the art in multiple object tracking. arXiv:1704.02781 (2017)"},{"key":"28_CR24","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"28_CR25","unstructured":"Long, C., Haizhou, A., Zijie, Z., Chong, S.: Real-time multiple people tracking with deeply learned candidate selection and person re-identification. In: ICME (2018)"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Luiten, J., Fischer, T., Leibe, B.: Track to reconstruct and reconstruct to track. arXiv:1910.00130 (2019)","DOI":"10.1109\/LRA.2020.2969183"},{"key":"28_CR27","unstructured":"Milan, A., Leal-Taix\u00e9, L., Reid, I., Roth, S., Schindler, K.: MOT16: a benchmark for multi-object tracking. arXiv:1603.00831 (2016)"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Moon, G., Chang, J., Lee, K.M.: PoseFix: model-agnostic general human pose refinement network. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00796"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Ren, J., et al.: Accurate single stage detector using recurrent rolling convolution. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.87"},{"key":"28_CR30","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS (2015)"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Sadeghian, A., Alahi, A., Savarese, S.: Tracking the untrackable: learning to track multiple cues with long-term dependencies. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.41"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Schulter, S., Vernaza, P., Choi, W., Chandraker, M.: Deep network flow for multi-object tracking. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.292"},{"key":"28_CR33","unstructured":"Shao, S., et al.: CrowdHuman: a benchmark for detecting human in a crowd. arXiv:1805.00123 (2018)"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Sharma, S., Ansari, J.A., Murthy, J.K., Krishna, K.M.: Beyond pixels: leveraging geometry and shape cues for online multi-object tracking. In: ICRA (2018)","DOI":"10.1109\/ICRA.2018.8461018"},{"key":"28_CR35","unstructured":"Shi, J., Tomasi, C.: Good features to track. In: CVPR (1994)"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: PointRCNN: 3D object proposal generation and detection from point cloud. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bul\u00f2, S.R.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Son, J., Baek, M., Cho, M., Han, B.: Multi-object tracking with quadruplet convolutional neural networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.403"},{"key":"28_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-540-69568-4_1","volume-title":"Multimodal Technologies for Perception of Humans","author":"R Stiefelhagen","year":"2007","unstructured":"Stiefelhagen, R., Bernardin, K., Bowers, R., Garofolo, J., Mostefa, D., Soundararajan, P.: The CLEAR 2006 evaluation. In: Stiefelhagen, R., Garofolo, J. (eds.) CLEAR 2006. LNCS, vol. 4122, pp. 1\u201344. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-69568-4_1"},{"key":"28_CR40","doi-asserted-by":"crossref","unstructured":"Tang, S., Andriluka, M., Andres, B., Schiele, B.: Multiple people tracking by lifted multicut and person re-identification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.394"},{"key":"28_CR41","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: fully convolutional one-stage object detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"28_CR42","unstructured":"Tomasi, C., Kanade, T.: Detection and tracking of point features. Technical report CMU-CS-91-132, Carnegie Mellon University (1991)"},{"key":"28_CR43","unstructured":"Tu, Z.: Auto-context and its application to high-level vision tasks. In: CVPR (2008)"},{"key":"28_CR44","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., et al.: MOTS: multi-object tracking and segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00813"},{"key":"28_CR45","unstructured":"Weng, X., Kitani, K.: A baseline for 3D multi-object tracking. arXiv:1907.03961 (2019)"},{"key":"28_CR46","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., Paulus, D.: Simple online and realtime tracking with a deep association metric. In: ICIP (2017)","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"28_CR47","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Alahi, A., Savarese, S.: Learning to track: online multi-object tracking by decision making. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.534"},{"key":"28_CR48","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"28_CR49","doi-asserted-by":"crossref","unstructured":"Xu, J., Cao, Y., Zhang, Z., Hu, H.: Spatial-temporal relation networks for multi-object tracking. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00409"},{"key":"28_CR50","doi-asserted-by":"crossref","unstructured":"Yang, F., Choi, W., Lin, Y.: Exploit all the layers: fast and accurate CNN object detector with scale dependent pooling and cascaded rejection classifiers. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.234"},{"key":"28_CR51","doi-asserted-by":"crossref","unstructured":"Yu, F., Li, W., Li, Q., Liu, Y., Shi, X., Yan, J.: POI: multiple object tracking with high performance detection and appearance feature. In: ECCV Workshops (2016)","DOI":"10.1007\/978-3-319-48881-3_3"},{"key":"28_CR52","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"key":"28_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, W., Zhou, H., Sun, S., Wang, Z., Shi, J., Loy, C.C.: Robust multi-modality multi-object tracking. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00245"},{"key":"28_CR54","unstructured":"Zhang, Z., Cheng, D., Zhu, X., Lin, S., Dai, J.: Integrated object detection and tracking with tracklet-conditioned detection. arXiv:1811.11167 (2018)"},{"key":"28_CR55","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arXiv:1904.07850 (2019)"},{"key":"28_CR56","unstructured":"Zhu, B., Jiang, Z., Zhou, X., Li, Z., Yu, G.: Class-balanced grouping and sampling for point cloud 3D object detection. arXiv:1908.09492 (2019)"},{"key":"28_CR57","doi-asserted-by":"crossref","unstructured":"Zhu, J., Yang, H., Liu, N., Kim, M., Zhang, W., Yang, M.H.: Online multi-object tracking with dual matching attention networks. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01228-1_23"},{"key":"28_CR58","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.52"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58548-8_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:14:07Z","timestamp":1730160847000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58548-8_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585471","9783030585488"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58548-8_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}