{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T04:10:10Z","timestamp":1746159010287,"version":"3.40.4"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T00:00:00Z","timestamp":1716249600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T00:00:00Z","timestamp":1716249600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-19326-1","type":"journal-article","created":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T06:01:36Z","timestamp":1716271296000},"page":"11269-11294","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A multi-scale fusion and dual attention network for crowd counting"],"prefix":"10.1007","volume":"84","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1415-855X","authenticated-orcid":false,"given":"De","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiting","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoping","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liangliang","family":"Su","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,5,21]]},"reference":[{"key":"19326_CR1","doi-asserted-by":"publisher","first-page":"2645","DOI":"10.1007\/s11554-023-01286-8","volume":"20","author":"MA Khan","year":"2023","unstructured":"Khan MA, Menouar H, Hamila R (2023) LCDNet: a lightweight crowd density estimation model for real-time video surveillance. J Real-Time Image Proc 20:2645\u20132654","journal-title":"J Real-Time Image Proc"},{"issue":"7","key":"19326_CR2","doi-asserted-by":"publisher","first-page":"9654","DOI":"10.1109\/TITS.2022.3178848","volume":"23","author":"X Yu","year":"2022","unstructured":"Yu X, Liang Y, Lin X, Wan J, Wang T, Dai H (2022) Frequency feature pyramid network with global-local consistency loss for crowd-and-vehicle counting in congested scenes. IEEE Trans Intell Transp Syst 23(7):9654\u20139664","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"19326_CR3","doi-asserted-by":"publisher","first-page":"13637","DOI":"10.1007\/s11042-022-13957-y","volume":"82","author":"G Yang","year":"2023","unstructured":"Yang G, Zhu D (2023) Survey on algorithms of people counting in dense crowd and crowd density estimation. Multimed Tools Appl 82:13637\u201313648","journal-title":"Multimed Tools Appl"},{"key":"19326_CR4","doi-asserted-by":"publisher","first-page":"27895","DOI":"10.1007\/s11042-022-12833-z","volume":"81","author":"MR Bhyiyan","year":"2022","unstructured":"Bhyiyan MR, Abdullah J, Hashim N, Farid FA (2022) Video analytics using deep learning for crowd analysis: a review. Multimed Tools Appl 81:27895\u201327922","journal-title":"Multimed Tools Appl"},{"issue":"4","key":"19326_CR5","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1109\/TPAMI.2011.155","volume":"34","author":"P Dollar","year":"2011","unstructured":"Dollar P, Wojek C, Schiele B, Perona P (2011) Pedestrian detection: An evaluation of the state of the art. IEEE Trans Pattern Anal Mach Intell 34(4):743\u2013761","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"19326_CR6","doi-asserted-by":"crossref","unstructured":"Topkaya IS, Erdogan H, Porikli F (2014) Counting people by clustering person detector outputs. In: Proceedings of IEEE International Conference on Advanced Video and Signal Based Surveillance,\u00a0IEEE, Seoul, pp 313\u2013318","DOI":"10.1109\/AVSS.2014.6918687"},{"issue":"10","key":"19326_CR7","doi-asserted-by":"publisher","first-page":"1986","DOI":"10.1109\/TPAMI.2015.2396051","volume":"37","author":"H Idrees","year":"2015","unstructured":"Idrees H, Soomro K, Shah M (2015) Detecting humans in dense crowds using locally-consistent scale prior and global occlusion reasoning. IEEE Trans Pattern Anal Mach Intell 37(10):1986\u20131998","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"19326_CR8","doi-asserted-by":"crossref","unstructured":"Idrees H, Saleemi I, Seibert C, Shah M (2013) Multi-source multi-scale counting in extremely dense crowd images. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Portland, pp 2547\u20132554","DOI":"10.1109\/CVPR.2013.329"},{"key":"19326_CR9","doi-asserted-by":"crossref","unstructured":"Chan AB, Vasconcelos N (2009) Bayesian poisson regression for crowd counting. In: Proceedings of IEEE International Conference on Computer Vision, IEEE, Kyoto, pp 545\u2013551","DOI":"10.1109\/ICCV.2009.5459191"},{"key":"19326_CR10","doi-asserted-by":"crossref","unstructured":"Tian Y, Sigal L, Badino H, Torre FD, Liu Y (2010) Latent gaussian mixture regression for human pose estimation. In: Proceedings of Asian Conference on Computer Vision, Springer, Queenstown, pp 533\u2013542","DOI":"10.1007\/978-3-642-19318-7_53"},{"key":"19326_CR11","doi-asserted-by":"publisher","first-page":"8776","DOI":"10.1109\/TIP.2020.3019185","volume":"29","author":"XH Dong","year":"2020","unstructured":"Dong XH, Zhou HY, Dong JY (2020) Texture classification using pair-wise difference pooling-based bilinear convolutional neural networks. IEEE Trans Image Process 29:8776\u20138790","journal-title":"IEEE Trans Image Process"},{"key":"19326_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102468","volume":"79","author":"QX Zhu","year":"2023","unstructured":"Zhu QX, Kuang WL, Li ZX (2023) A collaborative gated attention network for fine-grained visual classification. Displays 79:102468","journal-title":"Displays"},{"issue":"19","key":"19326_CR13","doi-asserted-by":"publisher","first-page":"21817","DOI":"10.1109\/JSEN.2021.3103042","volume":"21","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Jiu B, Wang PH, Liu HW, Liang SY (2021) An end-to-end anti-jamming target detection method based on CNN. IEEE Sens J 21(19):21817\u201321828","journal-title":"IEEE Sens J"},{"key":"19326_CR14","doi-asserted-by":"publisher","first-page":"2695","DOI":"10.1109\/TIP.2022.3160399","volume":"31","author":"X Zhang","year":"2022","unstructured":"Zhang X, Zhao WQ, Zhang W, Peng JY, Fan JP (2022) Guided filter network for semantic image segmentation. IEEE Trans Image Process 31:2695\u20132709","journal-title":"IEEE Trans Image Process"},{"key":"19326_CR15","doi-asserted-by":"publisher","first-page":"2637","DOI":"10.1016\/j.knosys.2023.110541","volume":"271","author":"T Wang","year":"2023","unstructured":"Wang T, Zhang T, Zhang KB, Wang HK, Li MQ, Lu J (2023) Context attention fusion network for crowd counting. Knowl-Based Syst 271:2637\u20132648","journal-title":"Knowl-Based Syst"},{"key":"19326_CR16","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.neucom.2021.02.103","volume":"472","author":"ZZ Fan","year":"2022","unstructured":"Fan ZZ, Zhang H, Zhang Z, Lu GM, Zhang YD, Wang YW (2022) A survey of crowd counting and density estimation based on convolutional neural network. Neurocomputing 472:224\u2013251","journal-title":"Neurocomputing"},{"key":"19326_CR17","doi-asserted-by":"publisher","first-page":"104597","DOI":"10.1016\/j.imavis.2022.104597","volume":"129","author":"MA Khan","year":"2023","unstructured":"Khan MA, Menouar H, Hamila R (2023) Revisiting crowd counting: State-of-the-art, trends, and future perspectives. Image Vis Comput 129:104597\u2013104612","journal-title":"Image Vis Comput"},{"key":"19326_CR18","doi-asserted-by":"publisher","unstructured":"Cao XK, Wang ZP, Zhao YY, Su F (2018) Scale aggregation network for accurate and efficient crowd counting. In: Proceedings of European Conference on Computer Vision, pp 757\u2013773 https:\/\/doi.org\/10.1007\/978-3-030-01228-1_45","DOI":"10.1007\/978-3-030-01228-1_45"},{"key":"19326_CR19","doi-asserted-by":"crossref","unstructured":"Cheng ZQ, Li JX, Dai Q, Wu X, He JY, Hauptmann AG (2019) Improving the learning of multi-column convolutional neural network for crowd counting. In: Proceedings of ACM International Conference on Multimedia, ACM, Nice, pp 1897\u20131906","DOI":"10.1145\/3343031.3350898"},{"issue":"10","key":"19326_CR20","doi-asserted-by":"publisher","first-page":"3486","DOI":"10.1109\/TCSVT.2019.2919139","volume":"30","author":"JY Gao","year":"2020","unstructured":"Gao JY, Wang Q, Li XL (2020) PCC net: Perspective crowd counting via spatial convolutional network. IEEE Trans Circuits Syst Video Technol 30(10):3486\u20133498","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"19326_CR21","doi-asserted-by":"crossref","unstructured":"Wu ZY, Sang J, Shi Y, Liu Q, Sang N, Liu XY (2021) CRANet: Cascade residual attention network for crowd counting. In: Proceedings of IEEE International Conference on Multimedia and Expo, IEEE, Virtual, pp 176\u2013185","DOI":"10.1109\/ICME51207.2021.9428236"},{"key":"19326_CR22","doi-asserted-by":"crossref","unstructured":"Zhang Y, Zhou D, Chen S, Gao S, Ma Y (2016) Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Las Vegas, pp 589\u2013597","DOI":"10.1109\/CVPR.2016.70"},{"key":"19326_CR23","doi-asserted-by":"crossref","unstructured":"Boominathan L, Kruthiventi SS, Venkatesh Babu R (2016) CrowdNet: A deep convolutional network for dense crowd counting. In: Proceedings of ACM International Conference on Multimedia, ACM, Amsterdam, pp 640\u2013644","DOI":"10.1145\/2964284.2967300"},{"key":"19326_CR24","doi-asserted-by":"crossref","unstructured":"Li YH, Zhang XF, Chen DM (2018) CSRNet: Dilated convolutional neural networks for understanding the highly congested scenes. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Salt Lake City, pp 1091\u20131100","DOI":"10.1109\/CVPR.2018.00120"},{"key":"19326_CR25","doi-asserted-by":"crossref","unstructured":"Sam DB, Surya S, Babu RV (2017) Switching convolutional neural network for crowd counting. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Honolulu, pp 4031\u20134039","DOI":"10.1109\/CVPR.2017.429"},{"key":"19326_CR26","doi-asserted-by":"crossref","unstructured":"Jiang XL, Xiao ZH, Zhang BC, Zhen XT, Cao XB, Doermann D, Shao L (2019) Crowd counting and density estimation by trellis encoder-decoder networks. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Long Beach, pp 6126\u20136135","DOI":"10.1109\/CVPR.2019.00629"},{"key":"19326_CR27","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.patrec.2020.05.009","volume":"135","author":"M Zhu","year":"2020","unstructured":"Zhu M, Wang XQ, Tang J, Wang N, Qu L (2020) Attentive multi-stage convolutional neural network for crowd counting. Pattern Recogn Lett 135:279\u2013285","journal-title":"Pattern Recogn Lett"},{"key":"19326_CR28","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1155\/2022\/3277995","volume":"2022","author":"LY Xiong","year":"2022","unstructured":"Xiong LY, Yi H, Huang XH, Huang WC (2022) SCFFNet: Spatial context feature fusion network for understanding the highly congested scenes. Math Probl Eng 2022:323\u2013339","journal-title":"Math Probl Eng"},{"key":"19326_CR29","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1016\/j.ins.2020.04.001","volume":"528","author":"L Dong","year":"2020","unstructured":"Dong L, Zhang HJ, Ji Y, Ding YX (2020) Crowd counting by using multi-level density-based spatial information: A multi-scale CNN framework. Inf Sci 528:79\u201391","journal-title":"Inf Sci"},{"key":"19326_CR30","first-page":"2856","volume":"325","author":"ZJ Chen","year":"2020","unstructured":"Chen ZJ, Cheng JH, Yuan YC, Liao DP, Li YZ, Lv JC (2020) Deep density-aware count regressor. Frontiers Artificial Intelligence Appl 325:2856\u20132863","journal-title":"Frontiers Artificial Intelligence Appl"},{"key":"19326_CR31","doi-asserted-by":"crossref","unstructured":"Deb D, Ventura J (2018) An aggregated multicolumn dilated convolution network for perspective-free counting. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition Workshops, IEEE, Salt Lake City, pp 308\u2013317","DOI":"10.1109\/CVPRW.2018.00057"},{"key":"19326_CR32","first-page":"5094","volume":"2019","author":"W Liu","year":"2019","unstructured":"Liu W, Salzmann M, Fua P (2019) Context-aware crowd counting. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition,\u00a0vol 2019. IEEE, Long Beach, pp 5094\u20135103","journal-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"19326_CR33","doi-asserted-by":"publisher","unstructured":"Zhai WZ, Gao ML, Li QL, Jeon G, Anisetti M (2023) FPANet: Feature pyramid attention network for crowd counting. Applied Intelligence, pp 1203\u20131215 https:\/\/doi.org\/10.1007\/s10489-023-04499-3","DOI":"10.1007\/s10489-023-04499-3"},{"key":"19326_CR34","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.1016\/j.engappai.2021.104563","volume":"108","author":"SH Zhang","year":"2022","unstructured":"Zhang SH, Zhang XX, Li H, He H, Song DD, Wang L (2022) Hierarchical pyramid attentive network with spatial separable convolution for crowd counting. Eng Appl Artif Intell 108:1045\u20131060","journal-title":"Eng Appl Artif Intell"},{"issue":"8","key":"19326_CR35","doi-asserted-by":"publisher","first-page":"9285","DOI":"10.1007\/s10489-022-03954-x","volume":"53","author":"S Aldhaheri","year":"2023","unstructured":"Aldhaheri S, Alotaibi R, Alzahrani B, Hadi A, Mahmood A, Alhothali A, Barnawi A (2023) MACC Net: Multi-task attention crowd counting network. Appl Intell 53(8):9285\u20139297","journal-title":"Appl Intell"},{"issue":"13","key":"19326_CR36","doi-asserted-by":"publisher","first-page":"15436","DOI":"10.1007\/s10489-022-03263-3","volume":"52","author":"Y Shi","year":"2022","unstructured":"Shi Y, Sang J, Wu ZY, Wang FS, Liu XY, Xia XF, Sang N (2022) MGSNet: A multi-scale and gated spatial attention network for crowd counting. Appl Intell 52(13):15436\u201315446","journal-title":"Appl Intell"},{"key":"19326_CR37","doi-asserted-by":"crossref","unstructured":"Hossain MA, Hosseinzadeh M, Chanda O, Wang Y (2019) Crowd counting using scale-aware attention networks. In: Proceedings of IEEE Winter Conference on Applications of Computer Vision, IEEE, Waikoloa, pp 1280\u20131288","DOI":"10.1109\/WACV.2019.00141"},{"issue":"9","key":"19326_CR38","doi-asserted-by":"publisher","first-page":"15233","DOI":"10.1109\/TITS.2021.3138896","volume":"23","author":"Q Wang","year":"2022","unstructured":"Wang Q, Breckon TP (2022) Crowd counting via segmentation guided attention networks and curriculum loss. IEEE Trans Intell Transp Syst 23(9):15233\u201315243","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"19326_CR39","unstructured":"Zhang H, Goodfellow IJ, Metaxas DN, Odena A (2018) Self-attention generative adversarial networks.  In: Proceedings of International Conference on Machine Learning, ACM, Los Angeles, pp 1900\u20131912"},{"issue":"11","key":"19326_CR40","doi-asserted-by":"publisher","first-page":"13097","DOI":"10.1007\/s10489-022-03187-y","volume":"52","author":"YC Li","year":"2022","unstructured":"Li YC, Jia RS, Hu YX, Han DN, Sun HM (2022) Crowd density estimation based on multi scale features fusion network with reverse attention mechanism. Appl Intell 52(11):13097\u201313113","journal-title":"Appl Intell"},{"key":"19326_CR41","unstructured":"Cheng JH, Chen ZJ, Zhang XY, Li YZ, Jing XY (2020) Exploit the potential of multi-column architecture for crowd counting. arXiv preprint, arXiv: 2007.05779. https:\/\/arxiv.org\/abs\/2007.05779"},{"key":"19326_CR42","doi-asserted-by":"crossref","unstructured":"Idrees H, Saleemi I, Seibert C, Shah M (2013) Multi-source multi-scale counting in extremely dense crowd images. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Portland, pp 2547\u20132554","DOI":"10.1109\/CVPR.2013.329"},{"key":"19326_CR43","doi-asserted-by":"publisher","unstructured":"Idrees H, Saleemi I, Seibert C, Shah M (2018) Composition loss for counting, density map estimation and localization in dense crowds. In: Proceedings of European Conference on Computer Vision, pp 544\u2013559 https:\/\/doi.org\/10.1007\/978-3-030-01216-8_33","DOI":"10.1007\/978-3-030-01216-8_33"},{"key":"19326_CR44","unstructured":"Chen K, Loy CV, Gong SG, Xiang T (2012) Feature mining for localized crowd counting. In: Proceedings of British Machine Vision Conference, British Machine Vision Association, Surrey, pp 120\u2013130"},{"issue":"3","key":"19326_CR45","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.1007\/s00371-021-02383-0","volume":"39","author":"ZX Li","year":"2023","unstructured":"Li ZX, Lu SH, Dong YS, Guo JY (2023) MSFFA: a multi-scale feature fusion and attention mechanism network for crowd counting. Vis Comput 39(3):1045\u20131056","journal-title":"Vis Comput"},{"key":"19326_CR46","first-page":"2864","volume":"325","author":"ZK Zou","year":"2020","unstructured":"Zou ZK, Liu YF, Xu SJ, Wei W, Wen SP, Zhou P (2020) Crowd counting via hierarchical scale recalibration network. Frontiers Artificial Intelligence Appl 325:2864\u20132871","journal-title":"Frontiers Artificial Intelligence Appl"},{"issue":"5","key":"19326_CR47","first-page":"2594","volume":"44","author":"V Sindagi","year":"2020","unstructured":"Sindagi V, Yasarla R, Patel VM (2020) JHU-CROWD++: Large-scale crowd counting dataset and a benchmark method. IEEE Trans Pattern Anal Mach Intell 44(5):2594\u20132609","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"19326_CR48","unstructured":"Wang BY, Liu HD, Samaras D, Hoai M (2020) Distribution matching for crowd counting. In: Proceedings of Advances in Neural Information Processing Systems, MIT Press, Virtual, pp 2010\u20132022"},{"issue":"8","key":"19326_CR49","first-page":"2739","volume":"43","author":"DB Sam","year":"2021","unstructured":"Sam DB, Peri SV, Sundararaman MN, Kamath A, Babu RV (2021) Locate, size and count: accurately resolving people in dense crowds via detection. IEEE Trans Pattern Anal Mach Intell 43(8):2739\u20132751","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"8","key":"19326_CR50","doi-asserted-by":"publisher","first-page":"5399","DOI":"10.1109\/TCSVT.2022.3146459","volume":"32","author":"WD Zhao","year":"2022","unstructured":"Zhao WD, Wang MY, Liu Y, Lu HM, Xu CG, Yao L (2022) Generalizable crowd counting via diverse context style learning. IEEE Trans Circuits Syst Video Technol 32(8):5399\u20135410","journal-title":"IEEE Trans Circuits Syst Video Technol"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19326-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-19326-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19326-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T03:28:44Z","timestamp":1746156524000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-19326-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,21]]},"references-count":50,"journal-issue":{"issue":"13","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["19326"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-19326-1","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,5,21]]},"assertion":[{"value":"19 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 February 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 April 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interest"}}]}}