{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T16:04:29Z","timestamp":1774454669646,"version":"3.50.1"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T00:00:00Z","timestamp":1650672000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T00:00:00Z","timestamp":1650672000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s00371-022-02485-3","type":"journal-article","created":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T12:02:57Z","timestamp":1650715377000},"page":"2671-2682","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["CCST: crowd counting with swin transformer"],"prefix":"10.1007","volume":"39","author":[{"given":"Bo","family":"Li","sequence":"first","affiliation":[]},{"given":"Yong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Haihui","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Baocai","family":"Yin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,23]]},"reference":[{"key":"2485_CR1","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zou, Y.: Fast visual object counting via example-based density estimation. In: 2016 IEEE International Conference on Image Processing (ICIP), pp.\u00a03653\u20133657, IEEE, 2016","DOI":"10.1109\/ICIP.2016.7533041"},{"key":"2485_CR2","doi-asserted-by":"crossref","unstructured":"Walach, C., Wolf, L.: Learning to count with cnn boosting. In: European conference on computer vision, pp.\u00a0660\u2013676, Springer, 2016","DOI":"10.1007\/978-3-319-46475-6_41"},{"key":"2485_CR3","first-page":"1324","volume":"23","author":"V Lempitsky","year":"2010","unstructured":"Lempitsky, V., Zisserman, A.: Learning to count objects in images. Adv. Neural Inf. Process. Syst. 23, 1324\u20131332 (2010)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2485_CR4","doi-asserted-by":"crossref","unstructured":"Onoro-Rubio, D., L\u00f3pez-Sastre, R.J.: Towards perspective-free object counting with deep learning. In: European conference on computer vision, pp.\u00a0615\u2013629, Springer, 2016","DOI":"10.1007\/978-3-319-46478-7_38"},{"key":"2485_CR5","doi-asserted-by":"crossref","unstructured":"Zhang, H., Kyaw, Z., Chang, S.-F., Chua, T.-S.: Visual translation embedding network for visual relation detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a05532\u20135540, 2017","DOI":"10.1109\/CVPR.2017.331"},{"key":"2485_CR6","doi-asserted-by":"crossref","unstructured":"Guerrero-G\u00f3mez-Olmedo, R., Torre-Jim\u00e9nez, B., L\u00f3pez-Sastre, R., Maldonado-Basc\u00f3n, S., Onoro-Rubio, D.: Extremely overlapping vehicle counting. In: Iberian Conference on Pattern Recognition and Image Analysis, pp.\u00a0423\u2013431, Springer, 2015","DOI":"10.1007\/978-3-319-19390-8_48"},{"key":"2485_CR7","doi-asserted-by":"crossref","unstructured":"Li, B., Shu, X., Yan, R.: Storyboard relational model for group activity recognition. In: Proceedings of the 2nd ACM International Conference on Multimedia in Asia, pp.\u00a01\u20137, 2021","DOI":"10.1145\/3444685.3446255"},{"key":"2485_CR8","doi-asserted-by":"crossref","unstructured":"Shu, X., Zhang, L., Qi, G.-J., Liu, W., Tang, J.: Spatiotemporal co-attention recurrent neural networks for human-skeleton motion prediction. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2021","DOI":"10.1109\/TPAMI.2021.3050918"},{"key":"2485_CR9","doi-asserted-by":"crossref","unstructured":"Shu, X., Yang, J., Yan, R., Song, Y.: Expansion-squeeze-excitation fusion network for elderly activity recognition. In: IEEE Transactions on Circuits and Systems for Video Technology, 2022","DOI":"10.1109\/TCSVT.2022.3142771"},{"key":"2485_CR10","doi-asserted-by":"crossref","unstructured":"Yan, R., Tang, J., Shu, X., Li, Z., Tian, Q.: Participation-contributed temporal dynamic model for roup activity recognition. In: Proceedings of the 26th ACM international conference on Multimedia, pp.\u00a01292\u20131300, 2018","DOI":"10.1145\/3240508.3240572"},{"key":"2485_CR11","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: Higcin: hierarchical graph-based cross inference network for group activity recognition. In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2020"},{"key":"2485_CR12","doi-asserted-by":"crossref","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: Social adaptive module for weakly-supervised group activity recognition. In: European Conference on Computer Vision, pp.\u00a0208\u2013224, Springer, 2020","DOI":"10.1007\/978-3-030-58598-3_13"},{"key":"2485_CR13","unstructured":"Wu, B., Nevatia, R.: Detection of multiple, partially occluded humans in a single image by bayesian combination of edgelet part detectors. In: Tenth IEEE International Conference on Computer Vision (ICCV\u201905) Volume 1, pp.\u00a090\u201397, IEEE, 2005"},{"key":"2485_CR14","doi-asserted-by":"crossref","unstructured":"Sabzmeydani, P., Mori, G.: Detecting pedestrians by learning shapelet features. In: 2007 IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a01\u20138, IEEE, 2007","DOI":"10.1109\/CVPR.2007.383134"},{"key":"2485_CR15","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201905), vol.\u00a01, pp.\u00a0886\u2013893, Ieee, 2005"},{"key":"2485_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., Ma, Y.: Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a0589\u2013597, 2016","DOI":"10.1109\/CVPR.2016.70"},{"key":"2485_CR17","doi-asserted-by":"publisher","first-page":"107616","DOI":"10.1016\/j.patcog.2020.107616","volume":"109","author":"Y Lei","year":"2021","unstructured":"Lei, Y., Liu, Y., Zhang, P., Liu, L.: Towards using count-level weak supervision for crowd counting. Pattern Recognit. 109, 107616 (2021)","journal-title":"Pattern Recognit."},{"key":"2485_CR18","doi-asserted-by":"crossref","unstructured":"Yang, Y., Li, G., Wu, Z., Su, L., Huang, Q., Sebe, N.: Weakly-supervised crowd counting learns from sorting rather than locations. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VIII 16, pp.\u00a01\u201317, Springer, 2020","DOI":"10.1007\/978-3-030-58598-3_1"},{"key":"2485_CR19","first-page":"8868","volume":"33","author":"DB Sam","year":"2019","unstructured":"Sam, D.B., Sajjan, N.N., Maurya, H., Babu, R.V.: Almost unsupervised learning for dense crowd counting. Proc. AAAI Conf. Artif. Intell. 33, 8868\u20138875 (2019)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2485_CR20","doi-asserted-by":"crossref","unstructured":"von Borstel, M., Kandemir, M., Schmidt, P., Rao, K., Rajamani, K., Hamprecht, F.A.: Gaussian process density counting from weak supervision. In: European Conference on Computer Vision, pp.\u00a0365\u2013380, Springer, 2016","DOI":"10.1007\/978-3-319-46448-0_22"},{"key":"2485_CR21","doi-asserted-by":"crossref","unstructured":"Boominathan, L., Kruthiventi, S., Babu, R.V.: Crowdnet: A deep convolutional network for dense crowd counting. In: Proceedings of the 24th ACM international conference on Multimedia, pp.\u00a0640\u2013644, 2016","DOI":"10.1145\/2964284.2967300"},{"key":"2485_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, L., Shi, M., Chen, Q.: Crowd counting via scale-adaptive convolutional neural network. In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp.\u00a01113\u20131121, IEEE, 2018","DOI":"10.1109\/WACV.2018.00127"},{"key":"2485_CR23","doi-asserted-by":"crossref","unstructured":"Cao, X., Wang, Z., Zhao, Y., Su, F.: Scale aggregation network for accurate and efficient crowd counting. In: Proceedings of the European Conference on Computer Vision (ECCV), pp.\u00a0734\u2013750, 2018","DOI":"10.1007\/978-3-030-01228-1_45"},{"key":"2485_CR24","doi-asserted-by":"crossref","unstructured":"Jiang, X., Xiao, Z., Zhang, B., Zhen, X., Cao, X., Doermann, D., Shao, L.: Crowd counting and density estimation by trellis encoder-decoder networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a06133\u20136142, 2019","DOI":"10.1109\/CVPR.2019.00629"},{"key":"2485_CR25","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1016\/j.ins.2020.04.001","volume":"528","author":"L Dong","year":"2020","unstructured":"Dong, L., Zhang, H., Ji, Y., Ding, Y.: Crowd counting by using multi-level density-based spatial information: a multi-scale cnn framework. Inf. Sci. 528, 79\u201391 (2020)","journal-title":"Inf. Sci."},{"issue":"8","key":"2485_CR26","doi-asserted-by":"publisher","first-page":"2127","DOI":"10.1007\/s00371-020-01974-7","volume":"37","author":"SD Khan","year":"2021","unstructured":"Khan, S.D., Basalamah, S.: Scale and density invariant head detection deep model for crowd counting in pedestrian crowds. Vis. Comput. 37(8), 2127\u20132137 (2021)","journal-title":"Vis. Comput."},{"key":"2485_CR27","doi-asserted-by":"crossref","unstructured":"Li, Z., Lu, S., Dong, Y., Guo, J.: Msffa: a multi-scale feature fusion and attention mechanism network for crowd counting. Vis. Comput., pp.\u00a01\u201312, (2022)","DOI":"10.1007\/s00371-021-02383-0"},{"key":"2485_CR28","unstructured":"Gao, J., Han, T., Yuan, Y., Wang, Q.: Learning independent instance maps for crowd localization. arXiv preprint arXiv:2012.04164, 2020"},{"key":"2485_CR29","doi-asserted-by":"crossref","unstructured":"Gao, J., Gong, M., Li, X.: Congested crowd instance localization with dilated convolutional swin transformer. arXiv preprint arXiv:2108.00584, 2021","DOI":"10.1016\/j.neucom.2022.09.113"},{"key":"2485_CR30","doi-asserted-by":"crossref","unstructured":"Liang, D., Chen, X., Xu, W., Zhou, Y., Bai, X.: Transcrowd: weakly-supervised crowd counting with transformer. arXiv preprint arXiv:2104.09116, 2021","DOI":"10.1007\/s11432-021-3445-y"},{"key":"2485_CR31","doi-asserted-by":"crossref","unstructured":"Amirgholipour\u00a0Kasmani, S., He, X., Jia, W., Wang, D., Zeibots, M.: A-ccnn: adaptive ccnn for density estimation and crowd counting. arXiv e-prints, pp.\u00a0arXiv\u20131804, 2018","DOI":"10.1109\/ICIP.2018.8451399"},{"key":"2485_CR32","doi-asserted-by":"crossref","unstructured":"Babu\u00a0Sam, D., Surya, S., Venkatesh\u00a0Babu, R.: Switching convolutional neural network for crowd counting. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a05744\u20135752, 2017","DOI":"10.1109\/CVPR.2017.429"},{"key":"2485_CR33","doi-asserted-by":"publisher","first-page":"2714","DOI":"10.1109\/TIP.2019.2952083","volume":"29","author":"Y Tian","year":"2019","unstructured":"Tian, Y., Lei, Y., Zhang, J., Wang, J.Z.: Padnet: pan-density crowd counting. IEEE Trans. Image Process. 29, 2714\u20132727 (2019)","journal-title":"IEEE Trans. Image Process."},{"key":"2485_CR34","doi-asserted-by":"crossref","unstructured":"Sam, D.B., Sajjan, N.N., Babu, R.V., Srinivasan, M.: Divide and grow: capturing huge diversity in crowd images with incrementally growing cnn. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a03618\u20133626, 2018","DOI":"10.1109\/CVPR.2018.00381"},{"issue":"4","key":"2485_CR35","doi-asserted-by":"publisher","first-page":"632","DOI":"10.20965\/jaciii.2017.p0632","volume":"21","author":"K Han","year":"2017","unstructured":"Han, K., Wan, W., Yao, H., Hou, L.: Image crowd counting using convolutional neural network and markov random field. J. Adv. Comput. Intell. Intell. Inf. 21(4), 632\u2013638 (2017)","journal-title":"J. Adv. Comput. Intell. Intell. Inf."},{"key":"2485_CR36","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. arXiv preprint arXiv:2103.14030, 2021","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2485_CR37","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, 2020"},{"key":"2485_CR38","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp.\u00a0213\u2013229, Springer, 2020","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2485_CR39","unstructured":"Li, B., Huang, H., Zhang, A., Liu, P., Liu, C.: Approaches on crowd counting and density estimation: a review. Pattern Anal. Appl., pp.\u00a01\u201322, (2021)"},{"key":"2485_CR40","unstructured":"Kang, D., Chan, A.: Crowd counting by adaptively fusing predictions from an image pyramid. arXiv preprint arXiv:1805.06115, 2018"},{"key":"2485_CR41","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a02117\u20132125, 2017","DOI":"10.1109\/CVPR.2017.106"},{"key":"2485_CR42","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.procs.2019.08.155","volume":"157","author":"TW Cenggoro","year":"2019","unstructured":"Cenggoro, T.W., Aslamiah, A.H., Yunanto, A.: Feature pyramid networks for crowd counting. Proc. Comput. Sci. 157, 175\u2013182 (2019)","journal-title":"Proc. Comput. Sci."},{"key":"2485_CR43","doi-asserted-by":"crossref","unstructured":"Kalyani, G. Janakiramaiah, B. LV, N.\u00a0P. Karuna, A., et\u00a0al.: Efficient crowd counting model using feature pyramid network and resnext 2021","DOI":"10.1007\/s00500-021-05993-x"},{"key":"2485_CR44","doi-asserted-by":"crossref","unstructured":"Wang, W., Liu, Q., Wang, W.: Pyramid-dilated deep convolutional neural network for crowd counting. Appl. Intell., pp.\u00a01\u201313, (2021)","DOI":"10.3390\/sym13040703"},{"key":"2485_CR45","doi-asserted-by":"crossref","unstructured":"Lei, T., Zhang, D., Wang, R., Li, S., Zhang, W., Nandi, A.K.: Mfp-net: multi-scale feature pyramid network for crowd counting. IET Image Process., 2021","DOI":"10.1049\/ipr2.12230"},{"key":"2485_CR46","unstructured":"Varior, R.R., Shuai, B., Tighe, J., Modolo, D.: Multi-scale attention network for crowd counting. arXiv preprint arXiv:1901.06026, 2019"},{"key":"2485_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2019.08.018","volume":"363","author":"J Gao","year":"2019","unstructured":"Gao, J., Wang, Q., Yuan, Y.: Scar: spatial-\/channel-wise attention regression networks for crowd counting. Neurocomputing 363, 1\u20138 (2019)","journal-title":"Neurocomputing"},{"key":"2485_CR48","unstructured":"Zhu, L., Zhao, Z., Lu, C., Lin, Y., Peng, Y., Yao, T.: Dual path multi-scale fusion networks with attention for crowd counting. arXiv preprint arXiv:1902.01115, 2019"},{"key":"2485_CR49","doi-asserted-by":"crossref","unstructured":"Jiang, X., Zhang, L., Xu, M., Zhang, T., Lv, P., Zhou, B., Yang, X., Pang, Y.: Attention scaling for crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a04706\u20134715, 2020","DOI":"10.1109\/CVPR42600.2020.00476"},{"key":"2485_CR50","unstructured":"Tian, Y., Chu, X., Wang, H.: Cctrans: simplifying and improving crowd counting with transformer. In: arXiv preprint arXiv:2109.14483, 2021"},{"key":"2485_CR51","unstructured":"Sun, G., Liu, Y., Probst, T., Paudel, D.P., Popovic, N., Van\u00a0Gool, L.: Boosting crowd counting with transformers. arXiv preprint arXiv:2105.10926, 2021"},{"key":"2485_CR52","doi-asserted-by":"crossref","unstructured":"Sajid, U., Chen, X., Sajid, H., Kim, T., Wang, G.: Audio-visual transformer based crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a02249\u20132259, 2021","DOI":"10.1109\/ICCVW54120.2021.00254"},{"key":"2485_CR53","doi-asserted-by":"crossref","unstructured":"Dai, Y., Gieseke, F., Oehmcke, S., Wu, Y., Barnard, K.: Attentional feature fusion. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp.\u00a03560\u20133569, 2021","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"2485_CR54","doi-asserted-by":"crossref","unstructured":"Zhu, C., He, Y., Savvides, M.: Feature selective anchor-free module for single-shot object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a0840\u2013849, 2019","DOI":"10.1109\/CVPR.2019.00093"},{"key":"2485_CR55","unstructured":"Liu, S., Huang, D., Wang, Y.: Learning spatial fusion for single-shot object detection. arXiv preprint arXiv:1911.09516, 2019"},{"key":"2485_CR56","doi-asserted-by":"crossref","unstructured":"Zhu, C., Chen, F., Shen, Z. Savvides, M.: Soft anchor-point object detection. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IX 16, pp.\u00a091\u2013107, Springer, 2020","DOI":"10.1007\/978-3-030-58545-7_6"},{"key":"2485_CR57","doi-asserted-by":"crossref","unstructured":"Chen, Q., Wang, Y., Yang, T., Zhang, X., Cheng, J., Sun, J.: You only look one-level feature. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a013039\u201313048, 2021","DOI":"10.1109\/CVPR46437.2021.01284"},{"key":"2485_CR58","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a07132\u20137141, 2018","DOI":"10.1109\/CVPR.2018.00745"},{"key":"2485_CR59","doi-asserted-by":"crossref","unstructured":"Sindagi, V., Yasarla, R., Patel, V.M.: Jhu-crowd++: Large-scale crowd counting dataset and a benchmark method. In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2020","DOI":"10.1109\/TPAMI.2020.3035969"},{"key":"2485_CR60","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Yasarla, R., Patel, V.M.: Pushing the frontiers of unconstrained crowd counting: New dataset and benchmark method. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a01221\u20131231, 2019","DOI":"10.1109\/ICCV.2019.00131"},{"key":"2485_CR61","doi-asserted-by":"crossref","unstructured":"Idrees, H., Tayyab, M., Athrey, K., Zhang, D., Al-Maadeed, S., Rajpoot, N., Shah, M.: Composition loss for counting, density map estimation and localization in dense crowds. In: Proceedings of the European Conference on Computer Vision (ECCV), pp.\u00a0532\u2013546, 2018","DOI":"10.1007\/978-3-030-01216-8_33"},{"key":"2485_CR62","doi-asserted-by":"crossref","unstructured":"Idrees, H., Saleemi, I., Seibert, C., Shah, M.: Multi-source multi-scale counting in extremely dense crowd images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a02547\u20132554, 2013","DOI":"10.1109\/CVPR.2013.329"},{"key":"2485_CR63","unstructured":"Kingma D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980, 2014"},{"key":"2485_CR64","doi-asserted-by":"crossref","unstructured":"Sindagi V.A., Patel, V.M.: Cnn-based cascaded multi-task learning of high-level prior and density estimation for crowd counting. In: 2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS), pp.\u00a01\u20136, IEEE, 2017","DOI":"10.1109\/AVSS.2017.8078491"},{"key":"2485_CR65","doi-asserted-by":"crossref","unstructured":"Liu, L., Qiu, Z., Li, G., Liu, S., Ouyang, W., Lin, L.: Crowd counting with deep structured scale integration network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a01774\u20131783, 2019","DOI":"10.1109\/ICCV.2019.00186"},{"key":"2485_CR66","doi-asserted-by":"crossref","unstructured":"Liu, W., Salzmann, M., Fua, P.: Context-aware crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a05099\u20135108, 2019","DOI":"10.1109\/CVPR.2019.00524"},{"key":"2485_CR67","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., Chen, D.: Csrnet: dilated convolutional neural networks for understanding the highly congested scenes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp.\u00a01091\u20131100, 2018","DOI":"10.1109\/CVPR.2018.00120"},{"key":"2485_CR68","doi-asserted-by":"crossref","unstructured":"Sindagi V.A., Patel, V.M.: Multi-level bottom-top and top-bottom feature fusion for crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a01002\u20131012, 2019","DOI":"10.1109\/ICCV.2019.00109"},{"key":"2485_CR69","doi-asserted-by":"crossref","unstructured":"Wang, Q., Gao, J., Lin, W., Yuan, Y.: Learning from synthetic data for crowd counting in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a08198\u20138207, 2019","DOI":"10.1109\/CVPR.2019.00839"},{"key":"2485_CR70","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a06142\u20136151, 2019","DOI":"10.1109\/ICCV.2019.00624"},{"key":"2485_CR71","doi-asserted-by":"crossref","unstructured":"Xu, C., Liang, D., Xu, Y., Bai, S., Zhan, W., Bai, X., Tomizuka, M.: Autoscale: learning to scale for crowd counting. Int. J. Comput. Vis., pp.\u00a01\u201330, (2022)","DOI":"10.1007\/s11263-021-01542-z"},{"key":"2485_CR72","doi-asserted-by":"crossref","unstructured":"Wan, J., Liu, Z., Chan, A.B.: A generalized loss function for crowd counting and localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a01974\u20131983, 2021","DOI":"10.1109\/CVPR46437.2021.00201"},{"key":"2485_CR73","doi-asserted-by":"crossref","unstructured":"Abousamra, S., Hoai, M., Samaras, D., Chen, C.: Localization in the crowd with topological constraints. In: Proceedings of AAAI Conference on Artificial Intelligence, 2021","DOI":"10.1609\/aaai.v35i2.16170"},{"key":"2485_CR74","doi-asserted-by":"crossref","unstructured":"Liang, D., Xu, W., Bai, X.: An end-to-end transformer model for crowd localization. arXiv preprint arXiv:2202.13065, 2022","DOI":"10.1007\/978-3-031-19769-7_3"},{"key":"2485_CR75","doi-asserted-by":"crossref","unstructured":"Shi, Z., Mettes, P., Snoek, C.G.: Counting with focus for free. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a04200\u20134209, 2019","DOI":"10.1109\/ICCV.2019.00430"},{"key":"2485_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, A., Shen, J., Xiao, Z., Zhu, F., Zhen, X., Cao, X., Shao, L.: Relational attention network for crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a06788\u20136797, 2019","DOI":"10.1109\/ICCV.2019.00689"},{"key":"2485_CR77","doi-asserted-by":"crossref","unstructured":"Bai, S., He, Z., Qiao, Y., Hu, H., Wu, W., Yan, J.: Adaptive dilated network with self-correction supervision for counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a04594\u20134603, 2020","DOI":"10.1109\/CVPR42600.2020.00465"},{"key":"2485_CR78","doi-asserted-by":"crossref","unstructured":"Zheng, S., Lu, J., Zhao, H., Zhu, X., Luo, Z., Wang, Y., Fu, Y., Feng, J., Xiang, T., Torr, P.H., et\u00a0al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a06881\u20136890, 2021","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"2485_CR79","first-page":"2576","volume":"35","author":"Q Song","year":"2021","unstructured":"Song, Q., Wang, C., Wang, Y., Tai, Y., Wang, C., Li, J., Wu, J., Ma, J.: To choose or to fuse? scale selection for crowd counting. Proc. AAAI Conf. Artif. Intell. 35, 2576\u20132583 (2021)","journal-title":"Proc. AAAI Conf. Artif. Intell."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-022-02485-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-022-02485-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-022-02485-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,29]],"date-time":"2023-06-29T10:04:49Z","timestamp":1688033089000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-022-02485-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,23]]},"references-count":79,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["2485"],"URL":"https:\/\/doi.org\/10.1007\/s00371-022-02485-3","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4,23]]},"assertion":[{"value":"27 March 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 April 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}