{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T05:37:21Z","timestamp":1759815441604,"version":"3.37.3"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T00:00:00Z","timestamp":1685404800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T00:00:00Z","timestamp":1685404800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1007\/s11042-023-15366-1","type":"journal-article","created":{"date-parts":[[2023,5,30]],"date-time":"2023-05-30T07:02:47Z","timestamp":1685430167000},"page":"5165-5180","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Scale-aware local difference attention on pyramidal features for crowd counting"],"prefix":"10.1007","volume":"83","author":[{"given":"Qian","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5914-7109","authenticated-orcid":false,"given":"Shizhou","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xinyao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yanning","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,30]]},"reference":[{"key":"15366_CR1","doi-asserted-by":"crossref","unstructured":"Chen K, Gong S, Xiang T, et al. (2013) Cumulative attribute space for age and crowd density estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2467\u20132474","DOI":"10.1109\/CVPR.2013.319"},{"key":"15366_CR2","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1016\/j.neucom.2019.11.064","volume":"382","author":"J Chen","year":"2020","unstructured":"Chen J, Su W, Wang Z (2020) Crowd counting with crowd attention convolutional neural network. Neurocomputing 382:210\u2013220","journal-title":"Neurocomputing"},{"key":"15366_CR3","doi-asserted-by":"crossref","unstructured":"Ge W, Collins R T (2009) Marked point processes for crowd counting. In: 2009 IEEE Conference on computer vision and pattern recognition. IEEE, pp 2913\u20132920","DOI":"10.1109\/CVPR.2009.5206621"},{"key":"15366_CR4","doi-asserted-by":"crossref","unstructured":"Hossain M, Hosseinzadeh M, Chanda O, et al. (2019) Crowd counting using scale-aware attention networks. In: 2019 IEEE Winter conference on applications of computer vision (WACV). IEEE, pp 1280\u20131288","DOI":"10.1109\/WACV.2019.00141"},{"key":"15366_CR5","doi-asserted-by":"crossref","unstructured":"Idrees H, Saleemi I, Seibert C, et al. (2013) Multi-source multi-scale counting in extremely dense crowd images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2547\u20132554","DOI":"10.1109\/CVPR.2013.329"},{"key":"15366_CR6","doi-asserted-by":"crossref","unstructured":"Idrees H, Tayyab M, Athrey K, et al. (2018) Composition loss for counting, density map estimation and localization in dense crowds. In: Proceedings of the European conference on computer vision (ECCV), pp 532\u2013546","DOI":"10.1007\/978-3-030-01216-8_33"},{"key":"15366_CR7","doi-asserted-by":"publisher","first-page":"182050","DOI":"10.1109\/ACCESS.2019.2960292","volume":"7","author":"N Ilyas","year":"2019","unstructured":"Ilyas N, Ahmad A, Kim K (2019) Casa-crowd: a context-aware scale aggregation cnn-based crowd counting technique. IEEE Access 7:182050\u2013182059","journal-title":"IEEE Access"},{"key":"15366_CR8","doi-asserted-by":"crossref","unstructured":"Jiang X, Zhang L, Xu M, et al. (2020) Attention scaling for crowd counting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4706\u20134715","DOI":"10.1109\/CVPR42600.2020.00476"},{"key":"15366_CR9","unstructured":"Lempitsky V, Zisserman A (2010) Learning to count objects in images. In: Advances in neural information processing systems, pp 1324\u20131332"},{"key":"15366_CR10","doi-asserted-by":"crossref","unstructured":"Li M, Zhang Z, Huang K, et al. (2008) Estimating the number of people in crowded scenes by mid based foreground segmentation and head-shoulder detection. In: 2008 19th International conference on pattern recognition. IEEE, pp 1\u20134","DOI":"10.1109\/ICPR.2008.4761705"},{"key":"15366_CR11","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang X, Chen D (2018) Csrnet: dilated convolutional neural networks for understanding the highly congested scenes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1091\u20131100","DOI":"10.1109\/CVPR.2018.00120"},{"key":"15366_CR12","doi-asserted-by":"crossref","unstructured":"Lin T Y, Doll\u00e1r P, Girshick R, et al. (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"15366_CR13","doi-asserted-by":"crossref","unstructured":"Liu J, Gao C, Meng D, et al. (2018) Decidenet: counting varying density crowds through attention guided detection and density estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5197\u20135206","DOI":"10.1109\/CVPR.2018.00545"},{"key":"15366_CR14","doi-asserted-by":"crossref","unstructured":"Liu L, Wang H, Li G, et al. (2018b) Crowd counting using deep recurrent spatial-aware network. arXiv:180700601","DOI":"10.24963\/ijcai.2018\/118"},{"key":"15366_CR15","doi-asserted-by":"crossref","unstructured":"Liu L, Qiu Z, Li G, et al. (2019) Crowd counting with deep structured scale integration network. In: Proceedings of the IEEE international conference on computer vision, pp 1774\u20131783","DOI":"10.1109\/ICCV.2019.00186"},{"key":"15366_CR16","doi-asserted-by":"crossref","unstructured":"Liu W, Salzmann M, Fua P (2019) Context-aware crowd counting. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5099\u20135108","DOI":"10.1109\/CVPR.2019.00524"},{"key":"15366_CR17","doi-asserted-by":"publisher","first-page":"2188","DOI":"10.1109\/TMM.2021.3065580","volume":"23","author":"S Liu","year":"2021","unstructured":"Liu S, Wang S, Liu X et al (2021) Human memory update strategy: a multi-layer template update mechanism for remote visual monitoring. IEEE Trans Multimed 23:2188\u20132198","journal-title":"IEEE Trans Multimed"},{"key":"15366_CR18","doi-asserted-by":"crossref","unstructured":"Liu S, Wang S, Liu X et al (2022) Human inertial thinking strategy: a novel fuzzy reasoning mechanism for iot-assisted visual monitoring. IEEE Internet Things J","DOI":"10.1109\/JIOT.2022.3142115"},{"key":"15366_CR19","doi-asserted-by":"crossref","unstructured":"Liu S, Xu X, Zhang Y et al (2022) A reliable sample selection strategy for weakly supervised visual tracking. IEEE Trans Reliab","DOI":"10.1109\/TR.2022.3162346"},{"key":"15366_CR20","doi-asserted-by":"crossref","unstructured":"Ma Z, Wei X, Hong X et al (2019) Bayesian loss for crowd count estimation with point supervision. In: Proceedings of the IEEE international conference on computer vision, pp 6142\u20136151","DOI":"10.1109\/ICCV.2019.00624"},{"key":"15366_CR21","doi-asserted-by":"crossref","unstructured":"Ma Z, Wei X, Hong X et al (2021) Learning to count via unbalanced optimal transport. In: Proceedings of the AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v35i3.16332"},{"key":"15366_CR22","doi-asserted-by":"crossref","unstructured":"Oh M h, Olsen P, Ramamurthy K N (2020) Crowd counting with decomposed uncertainty. In: Proceedings of the AAAI conference on artificial intelligence, pp 11799\u201311806","DOI":"10.1609\/aaai.v34i07.6852"},{"key":"15366_CR23","doi-asserted-by":"crossref","unstructured":"Sam D B, Surya S, Babu R V (2017) Switching convolutional neural network for crowd counting. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, pp 4031\u20134039","DOI":"10.1109\/CVPR.2017.429"},{"key":"15366_CR24","doi-asserted-by":"crossref","unstructured":"Shen Z, Xu Y, Ni B et al (2018) Crowd counting via adversarial cross-scale consistency pursuit. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5245\u20135254","DOI":"10.1109\/CVPR.2018.00550"},{"key":"15366_CR25","doi-asserted-by":"crossref","unstructured":"Sindagi V A, Patel V M (2017) Cnn-based cascaded multi-task learning of high-level prior and density estimation for crowd counting. In: 2017 14th IEEE international conference on advanced video and signal based surveillance (AVSS). IEEE, pp 1\u20136","DOI":"10.1109\/AVSS.2017.8078491"},{"key":"15366_CR26","doi-asserted-by":"crossref","unstructured":"Sindagi V A, Patel V M (2017) Generating high-quality crowd density maps using contextual pyramid cnns. In: Proceedings of the IEEE international conference on computer vision, pp 1861\u20131870","DOI":"10.1109\/ICCV.2017.206"},{"key":"15366_CR27","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.patrec.2017.07.007","volume":"107","author":"VA Sindagi","year":"2018","unstructured":"Sindagi V A, Patel V M (2018) A survey of recent advances in cnn-based single image crowd counting and density estimation. Pattern Recogn Lett 107:3\u201316","journal-title":"Pattern Recogn Lett"},{"key":"15366_CR28","doi-asserted-by":"crossref","unstructured":"Sindagi V A, Patel V M (2019) Multi-level bottom-top and top-bottom feature fusion for crowd counting. In: Proceedings of the IEEE international conference on computer vision, pp 1002\u20131012","DOI":"10.1109\/ICCV.2019.00109"},{"key":"15366_CR29","doi-asserted-by":"crossref","unstructured":"Wang C, Zhang H, Yang L et al (2015) Deep people counting in extremely dense crowds. In: Proceedings of the 23rd ACM international conference on multimedia, pp 1299\u20131302","DOI":"10.1145\/2733373.2806337"},{"key":"15366_CR30","doi-asserted-by":"crossref","unstructured":"Wang X, Cai Z, Gao D et al (2019) Towards universal object detection by domain attention. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7289\u20137298","DOI":"10.1109\/CVPR.2019.00746"},{"key":"15366_CR31","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhang J, Kan M et al (2020) Self-supervised equivariant attention mechanism for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12275\u201312284","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"15366_CR32","doi-asserted-by":"crossref","unstructured":"Wang Y, Hou J, Houa X et al (2021) A self-training approach for point-supervised object detection and counting in crowds. IEEE Trans Image Process PP(99)","DOI":"10.1109\/TIP.2021.3055632"},{"key":"15366_CR33","doi-asserted-by":"crossref","unstructured":"Xiong H, Lu H, Liu C et al (2019) From open set to closed set: counting objects by spatial divide-and-conquer. In: Proceedings of the IEEE International Conference on Computer Vision, pp 8362\u20138371","DOI":"10.1109\/ICCV.2019.00845"},{"key":"15366_CR34","doi-asserted-by":"publisher","first-page":"1395","DOI":"10.1109\/TIP.2020.3043122","volume":"30","author":"Y Yang","year":"2020","unstructured":"Yang Y, Li G, Du D et al (2020) Embedding perspective analysis into multi-column convolutional neural network for crowd counting. IEEE Trans Image Process 30:1395\u20131407","journal-title":"IEEE Trans Image Process"},{"key":"15366_CR35","doi-asserted-by":"crossref","unstructured":"Yang Y, Li G, Wu Z et al (2020) Weakly-supervised crowd counting learns from sorting rather than locations. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VIII 16. Springer, pp 1\u201317","DOI":"10.1007\/978-3-030-58598-3_1"},{"key":"15366_CR36","doi-asserted-by":"crossref","unstructured":"Zhang Y, Zhou D, Chen S et al (2016) Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 589\u2013597","DOI":"10.1109\/CVPR.2016.70"},{"key":"15366_CR37","doi-asserted-by":"crossref","unstructured":"Zhang L, Shi M, Chen Q (2018) Crowd counting via scale-adaptive convolutional neural network. In: 2018 IEEE Winter conference on applications of computer vision (WACV). IEEE, pp 1113\u20131121","DOI":"10.1109\/WACV.2018.00127"},{"key":"15366_CR38","unstructured":"Zhang S, Yang Y, Wang P et al (2019) Attend to the difference: cross-modality person re-identification via contrastive correlation. arXiv:191011656"},{"key":"15366_CR39","unstructured":"Zhang F, Jiao L, Li L et al (2020) Multiresolution attention extractor for small object detection. arXiv:200605941"},{"key":"15366_CR40","doi-asserted-by":"crossref","unstructured":"Zhao M, Zhang J, Zhang C et al (2019) Leveraging heterogeneous auxiliary tasks to assist crowd counting. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 12736\u201312745","DOI":"10.1109\/CVPR.2019.01302"},{"key":"15366_CR41","unstructured":"Zhu L, Zhao Z, Lu C et al (2019) Dual path multi-scale fusion networks with attention for crowd counting. arXiv:190201115"},{"key":"15366_CR42","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.patrec.2020.05.009","volume":"135","author":"M Zhu","year":"2020","unstructured":"Zhu M, Wang X, Tang J et al (2020) Attentive multi-stage convolutional neural network for crowd counting. Pattern Recogn Lett 135:279\u2013285","journal-title":"Pattern Recogn Lett"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-15366-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-15366-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-15366-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,6]],"date-time":"2024-01-06T05:36:03Z","timestamp":1704519363000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-15366-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,30]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,1]]}},"alternative-id":["15366"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-15366-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2023,5,30]]},"assertion":[{"value":"20 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 April 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 May 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The manuscript has not been published before and is not being considered for publication elsewhere. All authors have contributed to the creation of this manuscript for important intellectual content and read and approved the final manuscript. We declare there is no conflict of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}}]}}