{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T20:24:29Z","timestamp":1774815869781,"version":"3.50.1"},"reference-count":115,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T00:00:00Z","timestamp":1641254400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T00:00:00Z","timestamp":1641254400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11263-021-01542-z","type":"journal-article","created":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T12:02:41Z","timestamp":1641297761000},"page":"405-434","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":108,"title":["AutoScale: Learning to Scale for Crowd Counting"],"prefix":"10.1007","volume":"130","author":[{"given":"Chenfeng","family":"Xu","sequence":"first","affiliation":[]},{"given":"Dingkang","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Yongchao","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Song","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Zhan","sequence":"additional","affiliation":[]},{"given":"Xiang","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Masayoshi","family":"Tomizuka","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,4]]},"reference":[{"key":"1542_CR1","doi-asserted-by":"crossref","unstructured":"Arteta, C., Lempitsky, V., Noble, J. A. & Zisserman, A. (2014). Interactive object counting. In Proceedings of European conference on computer vision (pp. 504\u2013518). Springer.","DOI":"10.1007\/978-3-319-10578-9_33"},{"key":"1542_CR2","doi-asserted-by":"crossref","unstructured":"Arteta, C., Lempitsky, V., & Zisserman, A. (2016). Counting in the wild. In Proceedings of European conference on computer vision (pp. 483\u2013498). Springer","DOI":"10.1007\/978-3-319-46478-7_30"},{"key":"1542_CR3","doi-asserted-by":"crossref","unstructured":"Babu\u00a0Sam, D., Sajjan, N. N., Venkatesh\u00a0Babu, R. & Srinivasan, M. (2018). Divide and grow: Capturing huge diversity in crowd images with incrementally growing cnn. In Proceedings of IEEE international conferences on computer vision and pattern recognition (pp. 3618\u20133626).","DOI":"10.1109\/CVPR.2018.00381"},{"issue":"12","key":"1542_CR4","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., & Cipolla, R. (2017). Segnet: A deep convolutional encoder-decoder architecture for image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(12), 2481\u20132495.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1542_CR5","doi-asserted-by":"crossref","unstructured":"Bai, S., He, Z., Qiao, Y., Hu, H., Wu, W. & Yan, J. (2020). Adaptive dilated network with self-correction supervision for counting. In Proceedings of IEEE international conferences on computer vision and pattern recognition (pp. 4594\u20134603).","DOI":"10.1109\/CVPR42600.2020.00465"},{"key":"1542_CR6","volume-title":"Digital image processing: Principles and applications","author":"GA Baxes","year":"1994","unstructured":"Baxes, G. A. (1994). Digital image processing: Principles and applications. New York: Wiley."},{"key":"1542_CR7","doi-asserted-by":"crossref","unstructured":"Brostow, G. J., & Cipolla, R. (2006). Unsupervised Bayesian detection of independent motion in crowds. In Proceedings of IEEE international conference on computer vision and pattern recognition (vol. 1, pp. 594\u2013601).","DOI":"10.1109\/CVPR.2006.320"},{"key":"1542_CR8","unstructured":"Cao, K., Wei, C., Gaidon, A., Arechiga, N., & Ma, T. (2019). Learning imbalanced datasets with label-distribution-aware margin loss. In Advances in neural information processing systems (pp. 1565\u20131576)."},{"key":"1542_CR9","doi-asserted-by":"crossref","unstructured":"Cao, X., Wang, Z., Zhao, Y. & Su, F. (2018). Scale aggregation network for accurate and efficient crowd counting. In Proceedings of European conference on computer vision (pp. 734\u2013750).","DOI":"10.1007\/978-3-030-01228-1_45"},{"key":"1542_CR10","doi-asserted-by":"crossref","unstructured":"Chan, A. B., Liang, Z. S. J. & Vasconcelos, N. (2008). Privacy preserving crowd monitoring: Counting people without people models or tracking. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 1\u20137).","DOI":"10.1109\/CVPR.2008.4587569"},{"key":"1542_CR11","doi-asserted-by":"crossref","unstructured":"Chen, K., Loy, C. C., Gong, S. & Xiang, T. (2012). Feature mining for localised crowd counting. In Proceedings of BMVC (p.\u00a03).","DOI":"10.5244\/C.26.21"},{"key":"1542_CR12","unstructured":"Chen, T. Y., Chen, C. H., Wang, D. J. & Kuo, Y. L. (2010). A people counting system based on face-detection. In Proceedings of international conference on genetic and evolutionary computing (pp. 699\u2013702)."},{"key":"1542_CR13","doi-asserted-by":"crossref","unstructured":"Cheng, Z. Q., Li, J. X., Dai, Q., Wu, X. & Hauptmann, A. G. (2019). Learning spatial awareness to improve crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 6152\u20136161).","DOI":"10.1109\/ICCV.2019.00625"},{"key":"1542_CR14","doi-asserted-by":"crossref","unstructured":"Cui, Y., Song, Y., Sun, C., Howard, A., & Belongie, S. (2018). Large scale fine-grained categorization and domain-specific transfer learning. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4109\u20134118).","DOI":"10.1109\/CVPR.2018.00432"},{"key":"1542_CR15","doi-asserted-by":"crossref","unstructured":"Dong, Q., Gong, S., & Zhu, X. (2017). Class rectification hard mining for imbalanced deep learning. In Proceedings of IEEE international conference on computer vision (pp. 1851\u20131860).","DOI":"10.1109\/ICCV.2017.205"},{"key":"1542_CR16","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., & Tao, D. (2018). Deep ordinal regression network for monocular depth estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2002\u20132011).","DOI":"10.1109\/CVPR.2018.00214"},{"key":"1542_CR17","unstructured":"Gao, J., Han, T., Wang, Q., & Yuan, Y. (2019). Domain-adaptive crowd counting via inter-domain features segregation and gaussian-prior reconstruction. arXiv preprint arXiv:1912.03677"},{"key":"1542_CR18","unstructured":"Gao, J., Lin, W., Zhao, B., Wang, D., Gao, C., & Wen, J. (2019). C 3 framework: An open-source pytorch code for crowd counting. arXiv:1907.02724."},{"key":"1542_CR19","doi-asserted-by":"crossref","unstructured":"Ge, W., & Collins, R. T. (2009). Marked point processes for crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 2913\u20132920).","DOI":"10.1109\/CVPR.2009.5206621"},{"key":"1542_CR20","doi-asserted-by":"crossref","unstructured":"Geng, C., Huang, S. J., & Chen, S. (2020). Recent advances in open set recognition: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/TPAMI.2020.2981604"},{"key":"1542_CR21","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In Proceedings of IEEE international conference on computer vision (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1542_CR22","doi-asserted-by":"crossref","unstructured":"Guerrero-G\u00f3mez-Olmedo, R., Torre-Jim\u00e9nez, B., L\u00f3pez-Sastre, R., Maldonado-Basc\u00f3n, S., & Onoro-Rubio, D. (2015). Extremely overlapping vehicle counting. In Iberian conference on pattern recognition and image analysis (pp. 423\u2013431). Springer.","DOI":"10.1007\/978-3-319-19390-8_48"},{"key":"1542_CR23","unstructured":"Ha, D., Dai, A., & Le, Q. V. (2016). Hypernetworks. arXiv preprint arXiv:1609.09106"},{"issue":"9","key":"1542_CR24","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., & Garcia, E. A. (2009). Learning from imbalanced data. IEEE Transactions on knowledge and data engineering, 21(9), 1263\u20131284.","journal-title":"IEEE Transactions on knowledge and data engineering"},{"key":"1542_CR25","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask r-cnn. In Proceedings of IEEE international conference on computer vision (pp. 2961\u20132969).","DOI":"10.1109\/ICCV.2017.322"},{"key":"1542_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1542_CR27","doi-asserted-by":"crossref","unstructured":"Hossain, M., Hosseinzadeh, M., Chanda, O., & Wang, Y. (2019). Crowd counting using scale-aware attention networks. In 2019 IEEE winter conference on applications of computer vision (WACV) (pp. 1280\u20131288). IEEE.","DOI":"10.1109\/WACV.2019.00141"},{"key":"1542_CR28","doi-asserted-by":"crossref","unstructured":"Hu, P., & Ramanan, D. (2017). Finding tiny faces. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 951\u2013959).","DOI":"10.1109\/CVPR.2017.166"},{"key":"1542_CR29","doi-asserted-by":"crossref","unstructured":"Hu, Y., Jiang, X., Liu, X., Zhang, B., Han, J., Cao, X., & Doermann, D. (2020). Nas-count: Counting-by-density with neural architecture search. In Proceedings of European conference on computer vision. Springer.","DOI":"10.1007\/978-3-030-58542-6_45"},{"key":"1542_CR30","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4700\u20134708).","DOI":"10.1109\/CVPR.2017.243"},{"issue":"10","key":"1542_CR31","doi-asserted-by":"publisher","first-page":"1986","DOI":"10.1109\/TPAMI.2015.2396051","volume":"37","author":"H Idrees","year":"2015","unstructured":"Idrees, H., Soomro, K., & Shah, M. (2015). Detecting humans in dense crowds using locally-consistent scale prior and global occlusion reasoning. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(10), 1986\u20131998.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1542_CR32","doi-asserted-by":"crossref","unstructured":"Idrees, H., Tayyab, M., Athrey, K., Zhang, D., Al-Maadeed, S., Rajpoot, N., & Shah, M. (2018). Composition loss for counting, density map estimation and localization in dense crowds. In Proceedings of European conference on computer vision. Springer.","DOI":"10.1007\/978-3-030-01216-8_33"},{"key":"1542_CR33","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et\u00a0al. (2015). Spatial transformer networks. In Proceedings of advances in neural information processing systems (pp. 2017\u20132025)."},{"key":"1542_CR34","doi-asserted-by":"crossref","unstructured":"Jiang, S., Lu, X., Lei, Y., & Liu, L. (2019). Mask-aware networks for crowd counting. IEEE Transactions on Circuits and Systems for Video Technology.","DOI":"10.1109\/TCSVT.2019.2934989"},{"key":"1542_CR35","doi-asserted-by":"crossref","unstructured":"Jiang, X., Xiao, Z., Zhang, B., Zhen, X., Cao, X., Doermann, D., & Shao, L. (2019). Crowd counting and density estimation by trellis encoder-decoder networks. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 6133\u20136142).","DOI":"10.1109\/CVPR.2019.00629"},{"key":"1542_CR36","doi-asserted-by":"crossref","unstructured":"Jiang, X., Zhang, L., Lv, P., Guo, Y., Zhu, R., Li, Y., Pang, Y., Li, X., Zhou, B., & Xu, M. (2019). Learning multi-level density maps for crowd counting. IEEE Transactions on Neural Networks and Learning Systems.","DOI":"10.1109\/TNNLS.2019.2933920"},{"key":"1542_CR37","doi-asserted-by":"crossref","unstructured":"Jiang, X., Zhang, L., Xu, M., Zhang, T., Lv, P., Zhou, B., Yang, X., & Pang, Y. (2020). Attention scaling for crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4706\u20134715).","DOI":"10.1109\/CVPR42600.2020.00476"},{"key":"1542_CR38","unstructured":"Kang, D., & Chan, A. (2018). Crowd counting by adaptively fusing predictions from an image pyramid. In Proceedings of BMVC."},{"issue":"5","key":"1542_CR39","doi-asserted-by":"publisher","first-page":"1408","DOI":"10.1109\/TCSVT.2018.2837153","volume":"29","author":"D Kang","year":"2018","unstructured":"Kang, D., Ma, Z., & Chan, A. B. (2018). Beyond counting: Comparisons of density maps for crowd analysis tasks-counting, detection, and tracking. IEEE Transactions on Circuits and Systems for Video Technology, 29(5), 1408\u20131422.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"1542_CR40","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. In Proceedings of international conference on learning representations."},{"key":"1542_CR41","doi-asserted-by":"crossref","unstructured":"Laradji, I. H., Rostamzadeh, N., Pinheiro, P. O., Vazquez, D., & Schmidt, M. (2018). Where are the blobs: Counting by localization with point supervision. In Proceedings of European conference on computer vision (pp. 547\u2013562).","DOI":"10.1007\/978-3-030-01216-8_34"},{"key":"1542_CR42","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., & Chen, D. (2018). CSRNet: Dilated convolutional neural networks for understanding the highly congested scenes. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 1091\u20131100).","DOI":"10.1109\/CVPR.2018.00120"},{"key":"1542_CR43","doi-asserted-by":"crossref","unstructured":"Lian, D., Li, J., Zheng, J., Luo, W., & Gao, S. (2019). Density map regression guided detection network for RGB-D crowd counting and localization. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 1821\u20131830).","DOI":"10.1109\/CVPR.2019.00192"},{"key":"1542_CR44","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Doll\u00e1r, P., Girshick, R. B., He, K., Hariharan, B., & Belongie, S. J. (2017). Feature pyramid networks for object detection. In Proceedings of IEEE international conference on computer vision and pattern recognition (Vol.\u00a01, p.\u00a04).","DOI":"10.1109\/CVPR.2017.106"},{"key":"1542_CR45","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., He, K., & Doll\u00e1r, P. (2017). Focal loss for dense object detection. In Proceedings of IEEE international conference on computer vision (pp. 2980\u20132988).","DOI":"10.1109\/ICCV.2017.324"},{"key":"1542_CR46","doi-asserted-by":"crossref","unstructured":"Liu, B., & Vasconcelos, N. (2015). Bayesian model adaptation for crowd counts. In Proceedings of IEEE international conference on computer vision (pp. 4175\u20134183).","DOI":"10.1109\/ICCV.2015.475"},{"key":"1542_CR47","doi-asserted-by":"crossref","unstructured":"Liu, C., Weng, X., & Mu, Y. (2019). Recurrent attentive zooming for joint crowd counting and precise localization. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 1217\u20131226).","DOI":"10.1109\/CVPR.2019.00131"},{"key":"1542_CR48","doi-asserted-by":"crossref","unstructured":"Liu, J., Gao, C., Meng, D., & Hauptmann, A. G. (2018). Decidenet: counting varying density crowds through attention guided detection and density estimation. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 5197\u20135206).","DOI":"10.1109\/CVPR.2018.00545"},{"key":"1542_CR49","doi-asserted-by":"crossref","unstructured":"Liu, L., Lu, H., Zou, H., Xiong, H., Cao, Z., & Chun, H. (2020). Weighing counts: Sequential crowd counting by reinforcement learning.","DOI":"10.1007\/978-3-030-58607-2_10"},{"key":"1542_CR50","doi-asserted-by":"crossref","unstructured":"Liu, L., Qiu, Z., Li, G., Liu, S., Ouyang, W., & Lin, L. (2019). Crowd counting with deep structured scale integration network. In Proceedings of IEEE international conference on computer vision (pp. 1774\u20131783).","DOI":"10.1109\/ICCV.2019.00186"},{"key":"1542_CR51","doi-asserted-by":"crossref","unstructured":"Liu, L., Wang, H., Li, G., Ouyang, W., & Lin, L. (2018). Crowd counting using deep recurrent spatial-aware network. IJCAI.","DOI":"10.24963\/ijcai.2018\/118"},{"key":"1542_CR52","doi-asserted-by":"crossref","unstructured":"Liu, N., Long, Y., Zou, C., Niu, Q., Pan, L., & Wu, H. (2019). Adcrowdnet: An attention-injective deformable convolutional network for crowd understanding. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 3225\u20133234).","DOI":"10.1109\/CVPR.2019.00334"},{"key":"1542_CR53","doi-asserted-by":"crossref","unstructured":"Liu, W., Salzmann, M., & Fua, P. (2019). Context-aware crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 5099\u20135108).","DOI":"10.1109\/CVPR.2019.00524"},{"key":"1542_CR54","doi-asserted-by":"crossref","unstructured":"Liu, X., Van De Weijer, J., & Bagdanov, A. D. (2019). Exploiting unlabeled data in cnns by self-supervised learning to rank. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/TPAMI.2019.2899857"},{"key":"1542_CR55","doi-asserted-by":"crossref","unstructured":"Liu, X., van\u00a0de Weijer, J., & Bagdanov, A. D. (2018). Leveraging unlabeled data for crowd counting by learning to rank. In Proceedings of IEEE international conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00799"},{"key":"1542_CR56","doi-asserted-by":"crossref","unstructured":"Liu, X., Yang, J., & Ding, W. (2020). Adaptive mixture regression network with local counting map for crowd counting. In Proceedings of European conference on computer vision. Springer.","DOI":"10.1007\/978-3-030-58586-0_15"},{"key":"1542_CR57","doi-asserted-by":"crossref","unstructured":"Liu, Y., Shi, M., Zhao, Q., & Wang, X. (2019). Point in, box out: Beyond counting persons in crowds. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 6469\u20136478).","DOI":"10.1109\/CVPR.2019.00663"},{"key":"1542_CR58","doi-asserted-by":"crossref","unstructured":"Luo, A., Yang, F., Li, X., Nie, D., Jiao, Z., Zhou, S., & Cheng, H. (2020). Hybrid graph neural networks for crowd counting. In Proceedings of the AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6839"},{"key":"1542_CR59","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., & Gong, Y. (2019). Bayesian loss for crowd count estimation with point supervision. In Proceedings of IEEE international conference on computer vision (pp. 6142\u20136151).","DOI":"10.1109\/ICCV.2019.00624"},{"key":"1542_CR60","doi-asserted-by":"crossref","unstructured":"Miao, Y., Lin, Z., Ding, G., & Han, J. (2020). Shallow feature based dense attention network for crowd counting. In Proceedings of the AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6848"},{"key":"1542_CR61","doi-asserted-by":"crossref","unstructured":"Najibi, M., Singh, B., & Davis, L. S. (2019) Autofocus: Efficient multi-scale inference. In Proceedings of IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2019.00984"},{"key":"1542_CR62","doi-asserted-by":"crossref","unstructured":"Oh, M. H., Olsen, P. A., & Ramamurthy, K. N. (2020). Crowd counting with decomposed uncertainty. In Proceedings of the AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6852"},{"key":"1542_CR63","doi-asserted-by":"crossref","unstructured":"Oh\u00a0Song, H., Xiang, Y., Jegelka, S., & Savarese, S. (2016). Deep metric learning via lifted structured feature embedding. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4004\u20134012).","DOI":"10.1109\/CVPR.2016.434"},{"key":"1542_CR64","doi-asserted-by":"crossref","unstructured":"Olmschenk, G., Tang, H., & Zhu, Z. (2019). Improving dense crowd counting convolutional neural networks using inverse k-nearest neighbor maps and multiscale upsampling. arXiv preprint arXiv:1902.05379.","DOI":"10.5220\/0009156201850195"},{"key":"1542_CR65","doi-asserted-by":"crossref","unstructured":"Onoro-Rubio, D., & L\u00f3pez-Sastre, R. J. (2016). Towards perspective-free object counting with deep learning. In Proceedings of European conference on computer vision (pp. 615\u2013629).","DOI":"10.1007\/978-3-319-46478-7_38"},{"key":"1542_CR66","doi-asserted-by":"crossref","unstructured":"Ouyang, W., Wang, X., Zhang, C., & Yang, X. (2016). Factors in finetuning deep model for object detection with long-tail distribution. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 864\u2013873).","DOI":"10.1109\/CVPR.2016.100"},{"key":"1542_CR67","doi-asserted-by":"crossref","unstructured":"Ranjan, V., Le, H., & Hoai, M. (2018). Iterative crowd counting. In Proceedings of European conference on computer vision.","DOI":"10.1007\/978-3-030-01234-2_17"},{"key":"1542_CR68","doi-asserted-by":"crossref","unstructured":"Recasens, A., Kellnhofer, P., Stent, S., Matusik, W., & Torralba, A. (2018) Learning to zoom: A saliency-based sampling layer for neural networks. In Proceedings of European conference on computer vision (pp. 51\u201366).","DOI":"10.1007\/978-3-030-01240-3_4"},{"key":"1542_CR69","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. In Proceedings of advances in neural information processing systems (pp. 91\u201399)."},{"key":"1542_CR70","doi-asserted-by":"crossref","unstructured":"Ribera, J., G\u00fcera, D., Chen, Y., & Delp, E. J. (2019). Locating objects without bounding boxes. In Proceedings of IEEE international conference on computer vision and pattern recognition, Long Beach, CA.","DOI":"10.1109\/CVPR.2019.00664"},{"key":"1542_CR71","doi-asserted-by":"crossref","unstructured":"Rodriguez, M., Laptev, I., Sivic, J., & Audibert, J. Y. (2011) Density-aware person detection and tracking in crowds. In Proceedings of IEEE international conference on computer vision (pp. 2423\u20132430).","DOI":"10.1109\/ICCV.2011.6126526"},{"key":"1542_CR72","doi-asserted-by":"crossref","unstructured":"Sajid, U., Sajid, H., Wang, H., & Wang, G. (2020). Zoomcount: A zooming mechanism for crowd counting in static images. IEEE Transactions on Circuits and Systems for Video Technology.","DOI":"10.1109\/TCSVT.2020.2978717"},{"key":"1542_CR73","doi-asserted-by":"crossref","unstructured":"Salakhutdinov, R., Torralba, A., & Tenenbaum, J. (2011). Learning to share visual appearance for multiclass object detection. In CVPR 2011 (pp. 1481\u20131488). IEEE.","DOI":"10.1109\/CVPR.2011.5995720"},{"key":"1542_CR74","unstructured":"Sam, D. B., Peri, S. V., Sundararaman, M. N., Kamath, A., & Radhakrishnan, V. B. (2020). Locate, size and count: Accurately resolving people in dense crowds via detection. IEEE Transactions on Pattern Analysis and Machine Intelligence."},{"key":"1542_CR75","doi-asserted-by":"crossref","unstructured":"Sam, D. B., Surya, S., & Babu, R. V. (2017). Switching convolutional neural network for crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (vol.\u00a01, p.\u00a06).","DOI":"10.1109\/CVPR.2017.429"},{"key":"1542_CR76","doi-asserted-by":"crossref","unstructured":"Shi, M., Yang, Z., Xu, C., & Chen, Q. (2019). Revisiting perspective information for efficient crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 7279\u20137288).","DOI":"10.1109\/CVPR.2019.00745"},{"key":"1542_CR77","doi-asserted-by":"crossref","unstructured":"Shi, Z., Mettes, P., & Snoek, C. G. (2019). Counting with focus for free. In Proceedings of IEEE international conference on computer vision (pp. 4200\u20134209).","DOI":"10.1109\/ICCV.2019.00430"},{"key":"1542_CR78","doi-asserted-by":"crossref","unstructured":"Shi, Z., Zhang, L., Liu, Y., Cao, X., Ye, Y., Cheng, M. M., & Zheng, G. (2018). Crowd counting with deep negative correlation learning. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 5382\u20135390).","DOI":"10.1109\/CVPR.2018.00564"},{"key":"1542_CR79","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In Proceedings of international conference on learning representations."},{"key":"1542_CR80","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A., & Patel, V. M. (2017). Cnn-based cascaded multi-task learning of high-level prior and density estimation for crowd counting. In Proceedings of IEEE international conference on advanced video and signal based surveillance (pp. 1\u20136).","DOI":"10.1109\/AVSS.2017.8078491"},{"key":"1542_CR81","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A., & Patel, V. M. (2017). Generating high-quality crowd density maps using contextual pyramid cnns. In Proceedings of IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2017.206"},{"key":"1542_CR82","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.patrec.2017.07.007","volume":"107","author":"VA Sindagi","year":"2018","unstructured":"Sindagi, V. A., & Patel, V. M. (2018). A survey of recent advances in cnn-based single image crowd counting and density estimation. Pattern Recognition Letters, 107, 3\u201316.","journal-title":"Pattern Recognition Letters"},{"key":"1542_CR83","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A. & Patel, V. M. (2019, Accepted). HA-CCN: Hierarchical attention-based crowd counting network. IEEE Transactions on Image Processing.","DOI":"10.1109\/AVSS.2019.8909889"},{"key":"1542_CR84","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A., & Patel, V. M. (2019). Multi-level bottom-top and top-bottom feature fusion for crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 1002\u20131012).","DOI":"10.1109\/ICCV.2019.00109"},{"key":"1542_CR85","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A., Yasarla, R., & Patel, V. M. (2019). Pushing the frontiers of unconstrained crowd counting: New dataset and benchmark method. In Proceedings of IEEE international conference on computer vision (pp. 1221\u20131231).","DOI":"10.1109\/ICCV.2019.00131"},{"key":"1542_CR86","doi-asserted-by":"crossref","unstructured":"Sindagi, V. A., Yasarla, R., & Patel, V. M. (2020). Jhu-crowd++: Large-scale crowd counting dataset and a benchmark method. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/TPAMI.2020.3035969"},{"key":"1542_CR87","doi-asserted-by":"crossref","unstructured":"Singh, B., & Davis, L. S. (2018). An analysis of scale invariance in object detection\u2013snip. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 3578\u20133587).","DOI":"10.1109\/CVPR.2018.00377"},{"key":"1542_CR88","unstructured":"Singh, B., Najibi, M., & Davis, L. S. (2018). Sniper: Efficient multi-scale training. In Proceedings of advances in neural information processing systems (pp. 9310\u20139320)."},{"key":"1542_CR89","doi-asserted-by":"crossref","unstructured":"Tian, Y., Lei, Y., Zhang, J., & Wang, J. Z. (2019). Padnet: Pan-density crowd counting. IEEE Transactions on Image Processing.","DOI":"10.1109\/TIP.2019.2952083"},{"key":"1542_CR90","unstructured":"Van\u00a0Horn, G., & Perona, P. (2017). The devil is in the tails: Fine-grained classification in the wild. arXiv preprint arXiv:1709.01450."},{"issue":"2","key":"1542_CR91","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s11263-005-6644-8","volume":"63","author":"P Viola","year":"2005","unstructured":"Viola, P., Jones, M. J., & Snow, D. (2005). Detecting pedestrians using patterns of motion and appearance. International Journal of Computer Vision, 63(2), 153\u2013161.","journal-title":"International Journal of Computer Vision"},{"key":"1542_CR92","doi-asserted-by":"crossref","unstructured":"Wan, J., & Chan, A. (2019). Adaptive density map generation for crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 1130\u20131139).","DOI":"10.1109\/ICCV.2019.00122"},{"key":"1542_CR93","doi-asserted-by":"crossref","unstructured":"Wan, J., Luo, W., Wu, B., Chan, A. B., & Liu, W. (2019). Residual regression with semantic prior for crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4036\u20134045).","DOI":"10.1109\/CVPR.2019.00416"},{"key":"1542_CR94","doi-asserted-by":"crossref","unstructured":"Wang, M., & Wang, X. (2011). Automatic adaptation of a generic pedestrian detector to a specific traffic scene. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 3401\u20133408).","DOI":"10.1109\/CVPR.2011.5995698"},{"key":"1542_CR95","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3013269","author":"Q Wang","year":"2020","unstructured":"Wang, Q., Gao, J., Lin, W., & Li, X. (2020). Nwpu-crowd: A large-scale benchmark for crowd counting and localization. IEEE Transactions on Pattern Analysis and Machine Intelligence. https:\/\/doi.org\/10.1109\/TPAMI.2020.3013269","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1542_CR96","doi-asserted-by":"crossref","unstructured":"Wang, Q., Gao, J., Lin, W., & Yuan, Y. (2019). Learning from synthetic data for crowd counting in the wild. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 8198\u20138207).","DOI":"10.1109\/CVPR.2019.00839"},{"key":"1542_CR97","unstructured":"Wang, Y. X., Ramanan, D., & Hebert, M. (2017). Learning to model the tail. In Advances in neural information processing systems (pp. 7029\u20137039)."},{"key":"1542_CR98","doi-asserted-by":"crossref","unstructured":"Xiong, H., Lu, H., Liu, C., Liu, L., Cao, Z., & Shen, C. (2019). From open set to closed set: Counting objects by spatial divide-and-conquer. In Proceedings of IEEE international conference on computer vision (pp. 8362\u20138371).","DOI":"10.1109\/ICCV.2019.00845"},{"key":"1542_CR99","doi-asserted-by":"crossref","unstructured":"Xu, C., Qiu, K., Fu, J., Bai, S., Xu, Y., & Bai, X. (2019). Learn to scale: Generating multipolar normalized density map for crowd counting. In Proceedings of IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2019.00847"},{"key":"1542_CR100","doi-asserted-by":"crossref","unstructured":"Yan, Z., Yuan, Y., Zuo, W., Tan, X., Wang, Y., Wen, S., & Ding, E. (2019). Perspective-guided convolution networks for crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 952\u2013961).","DOI":"10.1109\/ICCV.2019.00104"},{"key":"1542_CR101","doi-asserted-by":"crossref","unstructured":"Yang, Y., Li, G., Wu, Z., Su, L., Huang, Q., & Sebe, N. (2020). Reverse perspective network for perspective-aware object counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 4374\u20134383).","DOI":"10.1109\/CVPR42600.2020.00443"},{"key":"1542_CR102","doi-asserted-by":"crossref","unstructured":"Zhang, A., Shen, J., Xiao, Z., Zhu, F., Zhen, X., Cao, X., & Shao, L. (2019). Relational attention network for crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 6788\u20136797).","DOI":"10.1109\/ICCV.2019.00689"},{"key":"1542_CR103","doi-asserted-by":"crossref","unstructured":"Zhang, A., Yue, L., Shen, J., Zhu, F., Zhen, X., Cao, X., & Shao, L. (2019). Attentional neural fields for crowd counting. In Proceedings of IEEE international conference on computer vision (pp. 5714\u20135723).","DOI":"10.1109\/ICCV.2019.00581"},{"key":"1542_CR104","doi-asserted-by":"crossref","unstructured":"Zhang, C., Li, H., Wang, X., & Yang, X. (2015). Cross-scene crowd counting via deep convolutional neural networks. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 833\u2013841).","DOI":"10.1109\/CVPR.2015.7298684"},{"key":"1542_CR105","unstructured":"Zhang, L., Shi, Z., Cheng, M. M., Liu, Y., Bian, J. W., Zhou, J. T., Zheng, G., & Zeng, Z. (2019, Accepted). Nonlinear regression via deep negative correlation learning. t IEEE Transactions on Pattern Analysis and Machine Intelligence."},{"key":"1542_CR106","doi-asserted-by":"crossref","unstructured":"Zhang, Q., & Chan, A. B. (2019). Wide-area crowd counting via ground-plane density maps and multi-view fusion cnns. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 8297\u20138306).","DOI":"10.1109\/CVPR.2019.00849"},{"key":"1542_CR107","doi-asserted-by":"crossref","unstructured":"Zhang, Q., & Chan, A. B. (2020). 3d crowd counting via multi-view fusion with 3d gaussian kernels. Proceedings of the AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6980"},{"key":"1542_CR108","doi-asserted-by":"crossref","unstructured":"Zhang, X., Fang, Z., Wen, Y., Li, Z., & Qiao, Y. (2017). Range loss for deep face recognition with long-tailed training data. In Proceedings of IEEE international conference on computer vision (pp. 5409\u20135418).","DOI":"10.1109\/ICCV.2017.578"},{"key":"1542_CR109","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., & Ma, Y. (2016). Single-image crowd counting via multi-column convolutional neural network. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 589\u2013597).","DOI":"10.1109\/CVPR.2016.70"},{"key":"1542_CR110","doi-asserted-by":"crossref","unstructured":"Zhao, M., Zhang, J., Zhang, C., & Zhang, W. (2019). Leveraging heterogeneous auxiliary tasks to assist crowd counting. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 12736\u201312745).","DOI":"10.1109\/CVPR.2019.01302"},{"key":"1542_CR111","doi-asserted-by":"crossref","unstructured":"Zhao, X., Delleandrea, E., & Chen, L. (2009). A people counting system based on face detection and tracking in a video. In Proceedings of IEEE international conference on advanced video and signal based surveillance (pp. 67\u201372).","DOI":"10.1109\/AVSS.2009.45"},{"key":"1542_CR112","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Li, H., Zhao, R., & Wang, X. (2016). Crossing-line crowd counting with two-phase deep neural networks. In Proceedings of European conference on computer vision (pp. 712\u2013726). Springer.","DOI":"10.1007\/978-3-319-46484-8_43"},{"key":"1542_CR113","doi-asserted-by":"crossref","unstructured":"Zheng, H., Fu, J., Mei, T., & Luo, J. (2017). Learning multi-attention convolutional neural network for fine-grained image recognition. In Proceedings of IEEE international conference on computer vision (pp. 5209\u20135217).","DOI":"10.1109\/ICCV.2017.557"},{"key":"1542_CR114","doi-asserted-by":"crossref","unstructured":"Zhu, X., Anguelov, D., & Ramanan, D. (2014). Capturing long-tail distributions of object subcategories. In Proceedings of IEEE international conference on computer vision and pattern recognition (pp. 915\u2013922).","DOI":"10.1109\/CVPR.2014.122"},{"issue":"1","key":"1542_CR115","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1007\/s11263-015-0812-2","volume":"119","author":"X Zhu","year":"2016","unstructured":"Zhu, X., Vondrick, C., Fowlkes, C. C., & Ramanan, D. (2016). Do we need more training data? International Journal of Computer Vision, 119(1), 76\u201392.","journal-title":"International Journal of Computer Vision"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01542-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-021-01542-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01542-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,16]],"date-time":"2022-02-16T10:19:50Z","timestamp":1645006790000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-021-01542-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,4]]},"references-count":115,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["1542"],"URL":"https:\/\/doi.org\/10.1007\/s11263-021-01542-z","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,4]]},"assertion":[{"value":"16 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}