{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T11:38:03Z","timestamp":1773142683238,"version":"3.50.1"},"publisher-location":"Cham","reference-count":98,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729423","type":"print"},{"value":"9783031729430","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72943-0_2","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:43:17Z","timestamp":1732801397000},"page":"20-40","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["CountFormer: Multi-view Crowd Counting Transformer"],"prefix":"10.1007","author":[{"given":"Hong","family":"Mo","sequence":"first","affiliation":[]},{"given":"Xiong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jianchao","family":"Tan","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Qiong","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Hang","sequence":"additional","affiliation":[]},{"given":"Wenqi","family":"Ren","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Sam, D.B., Surya, S., Babu, R.V.: Switching convolutional neural network for crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE (2017)","DOI":"10.1109\/CVPR.2017.429"},{"issue":"8","key":"2_CR2","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1016\/S0262-8856(97)00093-0","volume":"16","author":"M Bertozz","year":"1998","unstructured":"Bertozz, M., Broggi, A., Fascioli, A.: Stereo inverse perspective mapping: theory and applications. Image Vision Comput. (IVC) 16(8), 585\u2013590 (1998)","journal-title":"Image Vision Comput. (IVC)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Boominathan, L., Kruthiventi, S.S.S., Babu, R.V.: Crowdnet: a deep convolutional network for dense crowd counting. In: Proceedings of the International Conference on Multimedia (MM), pp. 640\u2013644. ACM (2016)","DOI":"10.1145\/2964284.2967300"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuscenes: a multimodal dataset for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11621\u201311631. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"2_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1007\/978-3-030-01228-1_45","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Cao","year":"2018","unstructured":"Cao, X., Wang, Z., Zhao, Y., Su, F.: Scale aggregation network for accurate and efficient crowd counting. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 757\u2013773. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_45"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, Z.Q., Dai, Q., Li, H., Song, J., Wu, X., Hauptmann, A.G.: Rethinking spatial invariance of convolutional networks for object counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19638\u201319648. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01902"},{"key":"2_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2020)"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Du, Z., Shi, M., Deng, J., Zafeiriou, S.: Redesigning multi-scale neural network for crowd counting. IEEE Trans. Image Process. (2023)","DOI":"10.1109\/TIP.2023.3289290"},{"key":"2_CR9","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.neucom.2020.01.087","volume":"392","author":"Y Fang","year":"2020","unstructured":"Fang, Y., Gao, S., Li, J., Luo, W., He, L., Bo, H.: Multi-level feature fusion based locality-constrained spatial transformer network for video crowd counting. Neurocomputing 392, 98\u2013107 (2020)","journal-title":"Neurocomputing"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Ferryman, J., Shahrokni, A.: Pets2009: dataset and challenge. In: IEEE International Workshop on Performance Evaluation of Tracking and Surveillance, pp. 1\u20136. IEEE (2009)","DOI":"10.1109\/PETS-WINTER.2009.5399556"},{"key":"2_CR11","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1016\/j.neucom.2022.09.113","volume":"513","author":"J Gao","year":"2022","unstructured":"Gao, J., Gong, M., Li, X.: Congested crowd instance localization with dilated convolutional swin transformer. Neurocomputing 513, 94\u2013103 (2022)","journal-title":"Neurocomputing"},{"issue":"2","key":"2_CR12","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1631\/FITEE.2200380","volume":"24","author":"J Gao","year":"2023","unstructured":"Gao, J., et al.: Forget less, count better: a domain-incremental self-distillation learning benchmark for lifelong crowd counting. Front. Inf. Technol. Electron. Eng. 24(2), 187\u2013202 (2023)","journal-title":"Front. Inf. Technol. Electron. Eng."},{"issue":"10","key":"2_CR13","doi-asserted-by":"publisher","first-page":"3486","DOI":"10.1109\/TCSVT.2019.2919139","volume":"30","author":"J Gao","year":"2019","unstructured":"Gao, J., Wang, Q., Li, X.: Pcc net: perspective crowd counting via spatial convolutional network. IEEE Trans. Circ. Syst. Video Technol. (TCSVT) 30(10), 3486\u20133498 (2019)","journal-title":"IEEE Trans. Circ. Syst. Video Technol. (TCSVT)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Lin, T.Y., Le, Q.V.: Nas-fpn: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7036\u20137045. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00720"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Hu, A., et al.: Fiery: future instance prediction in bird\u2019s-eye view from surround monocular cameras. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 15273\u201315282. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.01499"},{"key":"2_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"747","DOI":"10.1007\/978-3-030-58542-6_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Hu","year":"2020","unstructured":"Hu, Y.: NAS-count: counting-by-density with neural architecture search. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12367, pp. 747\u2013766. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58542-6_45"},{"key":"2_CR17","unstructured":"Huang, J., Huang, G., Zhu, Z., Ye, Y., Du, D.: Bevdet: high-performance multi-camera 3d object detection in bird-eye-view (2021)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Z., Zhou, J., Lu, J.: Tri-perspective view for vision-based 3d semantic occupancy prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9223\u20139232. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Huang, Z.-K., Chen, W.T., Kuo, S.Y., Yang, M.H., Chiang, Y.C.: Counting crowds in bad weather (2023)","DOI":"10.1109\/ICCV51070.2023.02130"},{"key":"2_CR20","unstructured":"Jaderberg, M., Simonyan, J., Zisserman, A., et\u00a0al.: Spatial transformer networks. Adv. Neural Inf. Process. Syst. (NeurIPS) 28 (2015)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, X., et al.: Attention scaling for crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4706\u20134715. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00476"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Jiang, Y., et al.: Polarformer: multi-camera 3d object detection with polar transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), vol. 1, pp. 1042\u20131050 (2023)","DOI":"10.1609\/aaai.v37i1.25185"},{"key":"2_CR23","unstructured":"Kim, J.H., On, K.W., Lim, W., Kim, J., Ha, J.W., Zhang, B.T.: Hadamard product for low-rank bilinear pooling (2016)"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Girshick, R., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6399\u20136408. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00656"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Lei, Y., Liu, Y., Zhang, P., Liu, L.: Towards using count-level weak supervision for crowd counting. Pattern Recogn. (PR) 109, 107616 (2021)","DOI":"10.1016\/j.patcog.2020.107616"},{"key":"2_CR26","unstructured":"Li, T., et al.: Lanesegnet: map learning with lane segment perception for autonomous driving (2023)"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Bevdepth: acquisition of reliable depth for multi-view 3d object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), vol. 37, pp. 1477\u20131485 (2023)","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., Chen, D.: Csrnet: dilated convolutional neural networks for understanding the highly congested scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE (2018)","DOI":"10.1109\/CVPR.2018.00120"},{"key":"2_CR29","doi-asserted-by":"publisher","unstructured":"Li, Z., et al.: Bevformer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 1\u201318. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_1","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Li, Z., Yu, Z., Wang, W., Anandkumar, A., Lu, T., Alvarez, J.M.: Fb-bev: bev representation from forward-backward view transformations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6919\u20136928. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00637"},{"issue":"12","key":"2_CR31","doi-asserted-by":"publisher","first-page":"9056","DOI":"10.1109\/TPAMI.2021.3124956","volume":"44","author":"D Lian","year":"2021","unstructured":"Lian, D., Chen, X., Li, J., Luo, W., Gao, S.: Locating and counting heads in crowds with a depth prior. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 44(12), 9056\u20139072 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"issue":"6","key":"2_CR32","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-021-3445-y","volume":"65","author":"D Liang","year":"2022","unstructured":"Liang, D., Chen, X., Wei, X., Zhou, Yu., Bai, X.: Transcrowd: weakly-supervised crowd counting with transformers. Sci. China Inf. Sci. 65(6), 160104 (2022)","journal-title":"Sci. China Inf. Sci."},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Liang, D., Xu, W., Bai, X.: An end-to-end transformer model for crowd localization. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 38\u201354. Springer, Heidelberg (2022)","DOI":"10.1007\/978-3-031-19769-7_3"},{"key":"2_CR34","first-page":"10421","volume":"35","author":"T Liang","year":"2022","unstructured":"Liang, T., et al.: Bevfusion: a simple and robust lidar-camera fusion framework. Adv. Neural Inf. Process. Syst. (NeurIPS) 35, 10421\u201310434 (2022)","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"2_CR35","unstructured":"Liao, B., et al.: Maptr: structured modeling and learning for online vectorized hd map construction. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2117\u20132125. IEEE (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Liu, C., Lu, H., Cao, Z., Liu, T.: Point-query quadtree for crowd counting, localization, and more (2023)","DOI":"10.1109\/ICCV51070.2023.00161"},{"key":"2_CR38","doi-asserted-by":"crossref","unstructured":"Liu, H., Teng, Y., Lu, T., Wang, H., Wang, L.: Sparsebev: high-performance sparse 3d object detection from multi-camera videos. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 18580\u201318590 (2023)","DOI":"10.1109\/ICCV51070.2023.01703"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Liu, J., Gao, C., Meng, D., Hauptmann, A.G.: Decidenet: counting varying density crowds through attention guided detection and density estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5197\u20135206. IEEE (2018)","DOI":"10.1109\/CVPR.2018.00545"},{"key":"2_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1007\/978-3-030-58607-2_10","volume-title":"Computer Vision \u2013 ECCV 2020","author":"L Liu","year":"2020","unstructured":"Liu, L., Lu, H., Zou, H., Xiong, H., Cao, Z., Shen, C.: Weighing counts: sequential crowd counting by reinforcement learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 164\u2013181. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_10"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Liu, L., Qiu, Z., Li, G., Liu, S., Ouyang, W., Lin, L.: Crowd counting with deep structured scale integration network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). IEEE (2019)","DOI":"10.1109\/ICCV.2019.00186"},{"key":"2_CR42","doi-asserted-by":"crossref","unstructured":"Liu, N., Long, Y., Zou, C., Niu, Q., Pan, L., Wu, H.: Adcrowdnet: an attention-injective deformable convolutional network for crowd understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE (2019)","DOI":"10.1109\/CVPR.2019.00334"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Liu, W., Durasov, N., Fua, P.: Leveraging self-supervision for cross-domain crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5341\u20135352. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.00527"},{"key":"2_CR44","doi-asserted-by":"crossref","unstructured":"Liu, W., Salzmann, M., Fua, P.: Context-aware crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5099\u20135108. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00524"},{"key":"2_CR45","doi-asserted-by":"publisher","unstructured":"Liu, Y., Wang, T., Zhang, X., Sun, J.: Petr: position embedding transformation for multi-view 3d object detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 531\u2013548. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_31","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"2_CR46","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2_CR47","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Bevfusion: multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. In: Proceddings of the IEEE International Conference on Robotics and Automation (ICRA), pp. 2774\u20132781. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"2_CR48","doi-asserted-by":"crossref","unstructured":"Ma, Y., Sanchez, V., Guha, T.: Fusioncount: efficient crowd counting via multiscale feature fusion. In: International Conference on Image Processing (ICIP), pp. 3256\u20133260. IEEE (2022)","DOI":"10.1109\/ICIP46576.2022.9897322"},{"key":"2_CR49","doi-asserted-by":"crossref","unstructured":"Ma, Z., Hong, X., Wei, X., Qiu, Y., Gong, Y.: Towards a universal model for cross-dataset crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3205\u20133214. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00319"},{"key":"2_CR50","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6142\u20136151. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00624"},{"key":"2_CR51","doi-asserted-by":"crossref","unstructured":"Man, Y., Gui, L.Y., Wang, Y.X.: Bev-guided multi-modality fusion for driving perception. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 21960\u201321969 (2023)","DOI":"10.1109\/CVPR52729.2023.02103"},{"key":"2_CR52","doi-asserted-by":"publisher","first-page":"8199","DOI":"10.1109\/TIP.2020.3009030","volume":"29","author":"H Mo","year":"2020","unstructured":"Mo, H., et al.: Background noise filtering and distribution dividing for crowd counting. IEEE Trans. Image Process. (TIP) 29, 8199\u20138212 (2020)","journal-title":"IEEE Trans. Image Process. (TIP)"},{"key":"2_CR53","doi-asserted-by":"publisher","first-page":"6306","DOI":"10.1109\/TIP.2022.3207584","volume":"31","author":"H Mo","year":"2022","unstructured":"Mo, H., et al.: Attention-guided collaborative counting. IEEE Trans. Image Process. (TIP) 31, 6306\u20136319 (2022)","journal-title":"IEEE Trans. Image Process. (TIP)"},{"key":"2_CR54","doi-asserted-by":"crossref","unstructured":"Pan, X., Mo, H., Zhou, Z., Wu, W.: Attention guided region division for crowd counting. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2568\u20132572. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053761"},{"key":"2_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1007\/978-3-030-58568-6_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Philion","year":"2020","unstructured":"Philion, J., Fidler, S.: Lift, splat, shoot: encoding images from arbitrary camera rigs by implicitly unprojecting to 3D. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 194\u2013210. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_12"},{"key":"2_CR56","doi-asserted-by":"crossref","unstructured":"Qiu, H., Wang, C., Wang, J., Wang, N., Zeng, W.: Cross view fusion for 3d human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4342\u20134351. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00444"},{"key":"2_CR57","unstructured":"Ranasinghe, Y., Nair, N.G., Bandara, W.G.C., Patel, N.M.: Diffuse-denoise-count: accurate crowd-counting with diffusion models (2023)"},{"key":"2_CR58","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-319-48881-3_2","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"E Ristani","year":"2016","unstructured":"Ristani, E., Solera, F., Zou, R., Cucchiara, R., Tomasi, C.: Performance measures and a data set for\u00a0multi-target, multi-camera tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 17\u201335. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_2"},{"key":"2_CR59","doi-asserted-by":"crossref","unstructured":"Shi, M., Yang, Z., Xu, C., Chen, Q.: Revisiting perspective information for efficient crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7279\u20137288. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00745"},{"key":"2_CR60","doi-asserted-by":"crossref","unstructured":"Shi, X., Li, X., Wu, C., Kong, S., Yang, J., He, L.: A real-time deep network for crowd counting. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2328\u20132332. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053780"},{"key":"2_CR61","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Patel, V.M.: Cnn-based cascaded multi-task learning of high-level prior and density estimation for crowd counting. In: Proceedings of the International Conference on Advanced Video and Signal based Surveillance (AVSS), pp. 1\u20136. IEEE (2017)","DOI":"10.1109\/AVSS.2017.8078491"},{"key":"2_CR62","doi-asserted-by":"crossref","unstructured":"Song, Q., et al.: Rethinking counting and localization in crowds: a purely point-based framework. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3365\u20133374. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00335"},{"key":"2_CR63","doi-asserted-by":"crossref","unstructured":"Song, Q., et al.: To choose or to fuse? scale selection for crowd counting. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), vol. 35, pp. 2576\u20132583 (2021)","DOI":"10.1609\/aaai.v35i3.16360"},{"key":"2_CR64","unstructured":"Sun, G., Liu, Y., Probst, T., Paudel, D.P., Popovic, N., Van Gool, L.: Boosting crowd counting with transformers (2021)"},{"key":"2_CR65","doi-asserted-by":"crossref","unstructured":"Sun, P., et\u00a0al.: Scalability in perception for autonomous driving: waymo open dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2446\u20132454. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"2_CR66","unstructured":"Tian, Y., Chu, X., Wang, H.: Cctrans: simplifying and improving crowd counting with transformer (2021)"},{"key":"2_CR67","doi-asserted-by":"crossref","unstructured":"Tong, W., et\u00a0al.: Scene as occupancy. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8406\u20138415 (2023)","DOI":"10.1109\/ICCV51070.2023.00772"},{"key":"2_CR68","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. (NeurIPS) 30 (2017)"},{"key":"2_CR69","unstructured":"Wang, B., Liu, H., Samaras, D., Nguyen, M.H.: Distribution matching for crowd counting. Adv. Neural Inf. Process. Syst. (NeurIPS), 1595\u20131607 (2020)"},{"key":"2_CR70","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1016\/j.neucom.2020.05.056","volume":"407","author":"P Wang","year":"2020","unstructured":"Wang, P., Gao, C., Wang, Y., Li, H., Gao, Y.: Mobilecount: an efficient encoder-decoder framework for real-time crowd counting. Neurocomputing 407, 292\u2013299 (2020)","journal-title":"Neurocomputing"},{"key":"2_CR71","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Openoccupancy: a large scale benchmark for surrounding semantic occupancy perception (2023)","DOI":"10.1109\/ICCV51070.2023.01636"},{"key":"2_CR72","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chen, Y., Zhang, Z.: Frustumformer: adaptive instance-aware resampling for multi-view 3d detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5096\u20135105 (2023)","DOI":"10.1109\/CVPR52729.2023.00493"},{"key":"2_CR73","doi-asserted-by":"crossref","unstructured":"Wei, X., et al.: Scene-adaptive attention network for crowd counting (2021)","DOI":"10.1155\/2021\/5596488"},{"key":"2_CR74","doi-asserted-by":"crossref","unstructured":"Wei, X., Qiu, Y., Ma, Z., Hong, X., Gong, Y.: Semi-supervised crowd counting via multiple representation learning. IEEE Trans. Image Process. (2023)","DOI":"10.1109\/TIP.2023.3313490"},{"key":"2_CR75","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhao, L., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: Surroundocc: multi-camera 3d occupancy prediction for autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 21729\u201321740. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"2_CR76","unstructured":"Xu, R., et al.: Cobevt: cooperative bird\u2019s eye view semantic segmentation with sparse transformers (2022)"},{"key":"2_CR77","doi-asserted-by":"crossref","unstructured":"Yang, C., et\u00a0al.: Bevformer v2: adapting modern image backbones to bird\u2019s-eye-view recognition via perspective supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17830\u201317839. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"2_CR78","doi-asserted-by":"crossref","unstructured":"Yang, S., Guo, W., Ren, Y.: Crowdformer: an overlap patching vision transformer for top-down crowd counting. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 23\u201329 (2022)","DOI":"10.24963\/ijcai.2022\/215"},{"key":"2_CR79","doi-asserted-by":"crossref","unstructured":"Yuan, K., Guo, Z., Wang, Z.J.: Rggnet: tolerance aware lidar-camera online calibration with geometric deep learning and generative model. IEEE Rob. Autom. Lett. (RA-L) 5(4), 6956\u20136963 (2020)","DOI":"10.1109\/LRA.2020.3026958"},{"key":"2_CR80","doi-asserted-by":"publisher","unstructured":"Yuan, M., Wang, Y., Wei, X.: Translation, scale and rotation: cross-modal alignment meets rgb-infrared vehicle detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 509\u2013525. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_30","DOI":"10.1007\/978-3-031-20077-9_30"},{"key":"2_CR81","doi-asserted-by":"crossref","unstructured":"Zeng, L., Xu, X., Cai, B., Qiu, S., Zhang, T.: Multi-scale convolutional neural networks for crowd counting. In: Proceedings of the IEEE International Conference on Image Processing (ICIP). IEEE (2017)","DOI":"10.1109\/ICIP.2017.8296324"},{"key":"2_CR82","doi-asserted-by":"publisher","first-page":"5813","DOI":"10.1109\/TMM.2022.3199555","volume":"25","author":"Q Zhai","year":"2022","unstructured":"Zhai, Q., Yang, F., Li, X., Xie, G.-S., Cheng, H., Liu, Z.: Co-communication graph convolutional network for multi-view crowd counting. IEEE Trans. Multimedia (TMM) 25, 5813\u20135825 (2022)","journal-title":"IEEE Trans. Multimedia (TMM)"},{"key":"2_CR83","doi-asserted-by":"crossref","unstructured":"Zhang, A., et al.: Attentional neural fields for crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5714\u20135723. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00581"},{"key":"2_CR84","unstructured":"Zhang, C., Li, H., Wang, X., Yang, X.: Cross-scene crowd counting via deep convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 833\u2013841. IEEE (2015)"},{"key":"2_CR85","doi-asserted-by":"crossref","unstructured":"Zhang, L., Shi, M., Chen, Q.: Crowd counting via scale-adaptive convolutional neural network. In: Winter Conference on Applications of Computer Vision (WACV), pp. 1113\u20131121. IEEE (2018)","DOI":"10.1109\/WACV.2018.00127"},{"key":"2_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Chan, A.B.: Wide-area crowd counting via ground-plane density maps and multi-view fusion cnns. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8297\u20138306. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00849"},{"key":"2_CR87","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Chan, A.B.: 3d crowd counting via multi-view fusion with 3d gaussian kernels. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), vol. 34, pp. 12837\u201312844 (2020)","DOI":"10.1609\/aaai.v34i07.6980"},{"issue":"12","key":"2_CR88","doi-asserted-by":"publisher","first-page":"3123","DOI":"10.1007\/s11263-022-01685-7","volume":"130","author":"Q Zhang","year":"2022","unstructured":"Zhang, Q., Chan, A.B.: 3D crowd counting via geometric attention-guided multi-view fusion. Int. J. Comput. Vision 130(12), 3123\u20133139 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR89","doi-asserted-by":"publisher","unstructured":"Zhang, Q., Chan, A.B.: Calibration-free multi-view crowd counting. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 227\u2013244. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_14","DOI":"10.1007\/978-3-031-20077-9_14"},{"issue":"8","key":"2_CR90","doi-asserted-by":"publisher","first-page":"1938","DOI":"10.1007\/s11263-022-01626-4","volume":"130","author":"Q Zhang","year":"2022","unstructured":"Zhang, Q., Chan, A.B.: Wide-area crowd counting: multi-view fusion networks for counting in large scenes. Int. J. Comput. Vision (IJCV) 130(8), 1938\u20131960 (2022)","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"2_CR91","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Lin, W., Chan, A.B.: Cross-view cross-scene multi-view crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 557\u2013567. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00062"},{"key":"2_CR92","doi-asserted-by":"crossref","unstructured":"Zhang, X., et al.: Dcnas: densely connected neural architecture search for semantic image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13956\u201313967. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.01374"},{"key":"2_CR93","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., Ma, Y.: Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 589\u2013597. IEEE (2016)","DOI":"10.1109\/CVPR.2016.70"},{"key":"2_CR94","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhu, Z., Du, D.: Occformer: dual-path transformer for vision-based 3d semantic occupancy prediction (2023)","DOI":"10.1109\/ICCV51070.2023.00865"},{"key":"2_CR95","doi-asserted-by":"crossref","unstructured":"Zheng, L., Li, Y., Mu, Y.: Learning factorized cross-view fusion for multi-view crowd counting. In: Proceedings of the IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136. IEEE (2021)","DOI":"10.1109\/ICME51207.2021.9428284"},{"key":"2_CR96","doi-asserted-by":"crossref","unstructured":"Zhou, B., Kr\u00e4henb\u00fchl, P.: Cross-view transformers for real-time map-view semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13760\u201313769. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01339"},{"key":"2_CR97","doi-asserted-by":"crossref","unstructured":"Zhu, H., Yuan, J., Zhong, X., Yang, Z., Wang, Z., He, S.: Daot: domain-agnostically aligned optimal transport for domain-adaptive crowd counting (2023)","DOI":"10.1145\/3581783.3611793"},{"key":"2_CR98","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: deformable transformers for end-to-end object detection. In: International Conference on Learning Representations (ICLR) (2020)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72943-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:15:27Z","timestamp":1732803327000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72943-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031729423","9783031729430"],"references-count":98,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72943-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}