{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:27:07Z","timestamp":1777656427971,"version":"3.51.4"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031200731","type":"print"},{"value":"9783031200748","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20074-8_29","type":"book-chapter","created":{"date-parts":[[2022,11,11]],"date-time":"2022-11-11T20:23:11Z","timestamp":1668198191000},"page":"504-521","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Large Scale Real-World Multi-person Tracking"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5811-9950","authenticated-orcid":false,"given":"Bing","family":"Shuai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessandro","family":"Bergamo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2827-8667","authenticated-orcid":false,"given":"Uta","family":"B\u00fcchler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0074-8559","authenticated-orcid":false,"given":"Andrew","family":"Berneshawi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2019-1639","authenticated-orcid":false,"given":"Alyssa","family":"Boden","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0716-8119","authenticated-orcid":false,"given":"Joseph","family":"Tighe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,12]]},"reference":[{"key":"29_CR1","unstructured":"Fillerstock. http:\/\/fillerstock.com\/"},{"key":"29_CR2","unstructured":"Pexels. http:\/\/www.pexels.com\/"},{"key":"29_CR3","unstructured":"Pixabay. http:\/\/pixabay.com\/"},{"key":"29_CR4","doi-asserted-by":"crossref","unstructured":"Bai, H., Cheng, W., Chu, P., Liu, J., Zhang, K., Ling, H.: GMOT-40: a benchmark for generic multiple object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6719\u20136728 (2021)","DOI":"10.1109\/CVPR46437.2021.00665"},{"issue":"41","key":"29_CR5","doi-asserted-by":"publisher","first-page":"30509","DOI":"10.1007\/s11042-020-09004-3","volume":"79","author":"DR Beddiar","year":"2020","unstructured":"Beddiar, D.R., Nini, B., Sabokrou, M., Hadid, A.: Vision-based human activity recognition: a survey. Multimed. Tools Appl. 79(41), 30509\u201330555 (2020). https:\/\/doi.org\/10.1007\/s11042-020-09004-3","journal-title":"Multimed. Tools Appl."},{"key":"29_CR6","doi-asserted-by":"publisher","unstructured":"Bernardin, K., Stiefelhagen, R.: Evaluating multiple object tracking performance: the CLEAR MOT metrics. EURASIP J. Image Video Process. 2008 (2008). https:\/\/doi.org\/10.1155\/2008\/246309","DOI":"10.1155\/2008\/246309"},{"key":"29_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1007\/978-3-319-48881-3_56","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"L Bertinetto","year":"2016","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.S.: Fully-convolutional siamese networks for object tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 850\u2013865. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_56"},{"key":"29_CR8","doi-asserted-by":"crossref","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F., Upcroft, B.: Simple online and realtime tracking. In: 2016 IEEE International Conference on Image Processing (ICIP), pp. 3464\u20133468. IEEE (2016)","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"29_CR9","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11621\u201311631 (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"29_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"29_CR11","unstructured":"Carreira, J., Noland, E., Hillier, C., Zisserman, A.: A short note on the Kinetics-700 human action dataset. arXiv preprint arXiv:1907.06987 (2019)"},{"issue":"3","key":"29_CR12","first-page":"16","volume":"7","author":"M Chandrajit","year":"2016","unstructured":"Chandrajit, M., Girisha, R., Vasudev, T.: Multiple objects tracking in surveillance video using color and hu moments. Sig. Image Process. Int. J. (SIPIJ) 7(3), 16\u201327 (2016)","journal-title":"Sig. Image Process. Int. J. (SIPIJ)"},{"key":"29_CR13","doi-asserted-by":"publisher","first-page":"116306","DOI":"10.1016\/j.eswa.2021.116306","volume":"191","author":"R Chandrakar","year":"2022","unstructured":"Chandrakar, R., Raja, R., Miri, R., Sinha, U., Kushwaha, A.K.S., Raja, H.: Enhanced the moving object detection and object tracking for traffic surveillance using RBF-FDLNN and CBF algorithm. Expert Syst. Appl. 191, 116306 (2022)","journal-title":"Expert Syst. Appl."},{"key":"29_CR14","doi-asserted-by":"crossref","unstructured":"Chang, M.F., et al.: Argoverse: 3D tracking and forecasting with rich maps. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8748\u20138757 (2019)","DOI":"10.1109\/CVPR.2019.00895"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"Chang, S., et al.: Towards accurate human pose estimation in videos of crowded scenes. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 4630\u20134634 (2020)","DOI":"10.1145\/3394171.3416299"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"29_CR17","doi-asserted-by":"crossref","unstructured":"Corona, K., Osterdahl, K., Collins, R., Hoogs, A.: MEVA: a large-scale multiview, multimodal video dataset for activity detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 1060\u20131068, January 2021","DOI":"10.1109\/WACV48630.2021.00110"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Datta, A., Shah, M., Lobo, N.D.V.: Person-on-person violence detection in video data. In: Object Recognition Supported by User Interaction for Service Robots, vol. 1, pp. 433\u2013438. IEEE (2002)","DOI":"10.1109\/ICPR.2002.1044748"},{"key":"29_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1007\/978-3-030-58558-7_26","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Dave","year":"2020","unstructured":"Dave, A., Khurana, T., Tokmakov, P., Schmid, C., Ramanan, D.: TAO: a large-scale benchmark for tracking any object. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12350, pp. 436\u2013454. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58558-7_26"},{"key":"29_CR20","unstructured":"Dendorfer, P., et al.: MOT20: a benchmark for multi object tracking in crowded scenes. arXiv preprint arXiv:2003.09003 (2020)"},{"key":"29_CR21","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR 2009 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"29_CR22","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r, P., Wojek, C., Schiele, B., Perona, P.: Pedestrian detection: a benchmark. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 304\u2013311. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206631"},{"issue":"14","key":"29_CR23","doi-asserted-by":"publisher","first-page":"1707","DOI":"10.1177\/0278364910365417","volume":"29","author":"A Ess","year":"2010","unstructured":"Ess, A., Schindler, K., Leibe, B., Van Gool, L.: Object detection and tracking for autonomous navigation in dynamic environments. Int. J. Robot. Res. 29(14), 1707\u20131725 (2010)","journal-title":"Int. J. Robot. Res."},{"issue":"2","key":"29_CR24","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The Pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88(2), 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"29_CR25","doi-asserted-by":"crossref","unstructured":"Fabbri, M., et al.: MOTSynth: how can synthetic data help pedestrian detection and tracking? In: International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01067"},{"key":"29_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1007\/978-3-030-01225-0_27","volume-title":"Computer Vision \u2013 ECCV 2018","author":"M Fabbri","year":"2018","unstructured":"Fabbri, M., Lanzi, F., Calderara, S., Palazzi, A., Vezzani, R., Cucchiara, R.: Learning to detect and track visible and occluded body joints in a virtual world. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 450\u2013466. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_27"},{"key":"29_CR27","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"29_CR28","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, J., Cui, Y., Wang, Z., Chen, S.: SiamCAR: siamese fully convolutional classification and regression for visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6269\u20136277 (2020)","DOI":"10.1109\/CVPR42600.2020.00630"},{"key":"29_CR29","unstructured":"Han, X., et al.: MMPTRACK: large-scale densely annotated multi-camera multiple people tracking benchmark (2021)"},{"key":"29_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"749","DOI":"10.1007\/978-3-319-46448-0_45","volume-title":"Computer Vision \u2013 ECCV 2016","author":"D Held","year":"2016","unstructured":"Held, D., Thrun, S., Savarese, S.: Learning to track at 100 FPS with deep regression networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 749\u2013765. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_45"},{"key":"29_CR31","unstructured":"Houston, J., et al.: One thousand and one hours: self-driving motion prediction dataset. arXiv preprint arXiv:2006.14480 (2020)"},{"key":"29_CR32","doi-asserted-by":"crossref","unstructured":"Leal-Taix\u00e9, L., Canton-Ferrer, C., Schindler, K.: Learning by tracking: siamese CNN for robust target association. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 33\u201340 (2016)","DOI":"10.1109\/CVPRW.2016.59"},{"key":"29_CR33","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J.: SiamRPN++: evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"29_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"29_CR35","unstructured":"Lin, W., et al.: Human in events: a large-scale benchmark for human-centric video analysis in complex events. arXiv preprint arXiv:2005.04490 (2020)"},{"key":"29_CR36","doi-asserted-by":"crossref","unstructured":"Liu, W., Bao, Q., Sun, Y., Mei, T.: Recent advances in monocular 2D and 3D human pose estimation: a deep learning perspective. arXiv preprint arXiv:2104.11536 (2021)","DOI":"10.1145\/3524497"},{"key":"29_CR37","doi-asserted-by":"crossref","unstructured":"Manen, S., Gygli, M., Dai, D., Gool, L.V.: PathTrack: fast trajectory annotation with path supervision. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 290\u2013299 (2017)","DOI":"10.1109\/ICCV.2017.40"},{"key":"29_CR38","doi-asserted-by":"crossref","unstructured":"Mathur, G., Somwanshi, D., Bundele, M.M.: Intelligent video surveillance based on object tracking. In: 2018 3rd International Conference and Workshops on Recent Advances and Innovations in Engineering (ICRAIE), pp. 1\u20136. IEEE (2018)","DOI":"10.1109\/ICRAIE.2018.8710421"},{"key":"29_CR39","unstructured":"Milan, A., Leal-Taix\u00e9, L., Reid, I., Roth, S., Schindler, K.: MOT16: a benchmark for multi-object tracking. arXiv preprint arXiv:1603.00831 (2016)"},{"key":"29_CR40","doi-asserted-by":"crossref","unstructured":"Oh, S., et al.: A large-scale benchmark dataset for event recognition in surveillance video. In: CVPR 2011, pp. 3153\u20133160. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995586"},{"key":"29_CR41","doi-asserted-by":"crossref","unstructured":"Pang, B., Li, Y., Zhang, Y., Li, M., Lu, C.: TubeTK: adopting tubes to track multi-object in a one-step training model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6308\u20136318 (2020)","DOI":"10.1109\/CVPR42600.2020.00634"},{"key":"29_CR42","doi-asserted-by":"crossref","unstructured":"Pang, J., et al.: Quasi-dense similarity learning for multiple object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 164\u2013173 (2021)","DOI":"10.1109\/CVPR46437.2021.00023"},{"issue":"4","key":"29_CR43","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1109\/TIV.2019.2938110","volume":"4","author":"A Rangesh","year":"2019","unstructured":"Rangesh, A., Trivedi, M.M.: No blind spots: full-surround multi-object tracking for autonomous vehicles using cameras and lidars. IEEE Trans. Intell. Veh. 4(4), 588\u2013599 (2019)","journal-title":"IEEE Trans. Intell. Veh."},{"key":"29_CR44","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"29_CR45","doi-asserted-by":"crossref","unstructured":"Rezaei, M., Azarmi, M., Mir, F.M.P.: Traffic-Net: 3D traffic monitoring using a single camera. arXiv preprint arXiv:2109.09165 (2021)","DOI":"10.21203\/rs.3.rs-1820244\/v1"},{"key":"29_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-319-48881-3_2","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"E Ristani","year":"2016","unstructured":"Ristani, E., Solera, F., Zou, R., Cucchiara, R., Tomasi, C.: Performance measures and a data set for\u00a0multi-target, multi-camera tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 17\u201335. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_2"},{"key":"29_CR47","doi-asserted-by":"crossref","unstructured":"Ristani, E., Tomasi, C.: Features for multi-target multi-camera tracking and re-identification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6036\u20136046 (2018)","DOI":"10.1109\/CVPR.2018.00632"},{"key":"29_CR48","unstructured":"Shao, S., et al.: CrowdHuman: a benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)"},{"key":"29_CR49","doi-asserted-by":"crossref","unstructured":"Shuai, B., Li, X., Kundu, K., Tighe, J.: Id-free person similarity learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01428"},{"key":"29_CR50","doi-asserted-by":"crossref","unstructured":"Shuai, B., Berneshawi, A., Li, X., Modolo, D., Tighe, J.: SiamMOT: siamese multi-object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12372\u201312382 (2021)","DOI":"10.1109\/CVPR46437.2021.01219"},{"key":"29_CR51","doi-asserted-by":"publisher","first-page":"103055","DOI":"10.1016\/j.jvcir.2021.103055","volume":"76","author":"L Song","year":"2021","unstructured":"Song, L., Yu, G., Yuan, J., Liu, Z.: Human pose estimation and its application to action recognition: a survey. J. Vis. Commun. Image Represent. 76, 103055 (2021)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"29_CR52","doi-asserted-by":"crossref","unstructured":"Sun, P., et al.: Scalability in perception for autonomous driving: Waymo open dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2446\u20132454 (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"29_CR53","doi-asserted-by":"crossref","unstructured":"Sundararaman, R., De Almeida Braga, C., Marchand, E., Pettre, J.: Tracking pedestrian heads in dense crowd. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3865\u20133875 (2021)","DOI":"10.1109\/CVPR46437.2021.00386"},{"key":"29_CR54","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9627\u20139636 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"29_CR55","doi-asserted-by":"crossref","unstructured":"Wang, G., Wang, Y., Zhang, H., Gu, R., Hwang, J.N.: Exploit the connectivity: multi-object tracking with trackletnet. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 482\u2013490 (2019)","DOI":"10.1145\/3343031.3350853"},{"key":"29_CR56","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/978-3-030-58621-8_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Wang","year":"2020","unstructured":"Wang, Z., Zheng, L., Liu, Y., Li, Y., Wang, S.: Towards real-time multi-object tracking. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12356, pp. 107\u2013122. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_7"},{"key":"29_CR57","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., Paulus, D.: Simple online and realtime tracking with a deep association metric. In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 3645\u20133649. IEEE (2017)","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"29_CR58","doi-asserted-by":"crossref","unstructured":"Wu, J., Osuntogun, A., Choudhury, T., Philipose, M., Rehg, J.M.: A scalable approach to activity recognition based on object use. In: 2007 IEEE 11th International Conference on Computer Vision, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/ICCV.2007.4408865"},{"key":"29_CR59","doi-asserted-by":"crossref","unstructured":"Xu, J., Cao, Y., Zhang, Z., Hu, H.: Spatial-temporal relation networks for multi-object tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3988\u20133998 (2019)","DOI":"10.1109\/ICCV.2019.00409"},{"key":"29_CR60","doi-asserted-by":"crossref","unstructured":"Xu, Y., Osep, A., Ban, Y., Horaud, R., Leal-Taix\u00e9, L., Alameda-Pineda, X.: How to train your deep multi-object tracker. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6787\u20136796 (2020)","DOI":"10.1109\/CVPR42600.2020.00682"},{"key":"29_CR61","doi-asserted-by":"crossref","unstructured":"Gan, Y., Han, R., Yin, L., Feng, W., Wang, S.: Self-supervised multi-view multi-human association and tracking. In: ACM MM (2021)","DOI":"10.1145\/3474085.3475177"},{"key":"29_CR62","doi-asserted-by":"crossref","unstructured":"Yu, F., et al.: BDD100K: a diverse driving dataset for heterogeneous multitask learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2636\u20132645 (2020)","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"29_CR63","unstructured":"Yu, F., Seff, A., Zhang, Y., Song, S., Funkhouser, T., Xiao, J.: LSUN: construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365 (2015)"},{"key":"29_CR64","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2403\u20132412 (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"issue":"11","key":"29_CR65","doi-asserted-by":"publisher","first-page":"3069","DOI":"10.1007\/s11263-021-01513-4","volume":"129","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Wang, C., Wang, X., Zeng, W., Liu, W.: FairMOT: on the fairness of detection and re-identification in multiple object tracking. Int. J. Comput. Vis. 129(11), 3069\u20133087 (2021). https:\/\/doi.org\/10.1007\/s11263-021-01513-4","journal-title":"Int. J. Comput. Vis."},{"key":"29_CR66","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"474","DOI":"10.1007\/978-3-030-58548-8_28","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Zhou","year":"2020","unstructured":"Zhou, X., Koltun, V., Kr\u00e4henb\u00fchl, P.: Tracking objects as points. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12349, pp. 474\u2013490. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58548-8_28"},{"key":"29_CR67","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arXiv preprint arXiv:1904.07850 (2019)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20074-8_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:53:31Z","timestamp":1728348811000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20074-8_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031200731","9783031200748"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20074-8_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"12 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}