{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:57:25Z","timestamp":1764723445809,"version":"3.46.0"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T00:00:00Z","timestamp":1762732800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T00:00:00Z","timestamp":1762732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62476235"],"award-info":[{"award-number":["62476235"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003787","name":"Natural Science Foundation of Hebei Province","doi-asserted-by":"publisher","award":["F2023203012"],"award-info":[{"award-number":["F2023203012"]}],"id":[{"id":"10.13039\/501100003787","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Innovation Capability Improvement Plan Project of Hebei Province","award":["22567626H"],"award-info":[{"award-number":["22567626H"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04919-6","type":"journal-article","created":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T00:45:09Z","timestamp":1762735509000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CMFNet: Cross-attention multi-scale fusion network for cross-modal crowd counting"],"prefix":"10.1007","volume":"19","author":[{"given":"Shihui","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Ping","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jiawei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhigang","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Zhan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"4919_CR1","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., Ma, Y.: Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 589\u2013597 (2016)","DOI":"10.1109\/CVPR.2016.70"},{"key":"4919_CR2","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., Chen, D.: Csrnet: Dilated convolutional neural networks for understanding the highly congested scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1091\u20131100 (2018)","DOI":"10.1109\/CVPR.2018.00120"},{"key":"4919_CR3","doi-asserted-by":"crossref","unstructured":"Yue, G., Jiao, G., Xiang, J.: Semi-supervised iterative learning network for camouflaged object detection. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2025). IEEE","DOI":"10.1109\/ICASSP49660.2025.10890224"},{"key":"4919_CR4","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiao, G., Yue, G.: More observation leads to more clarity: Multi-view collaboration network for camouflaged object detection. Neurocomputing, 130433 (2025)","DOI":"10.1016\/j.neucom.2025.130433"},{"key":"4919_CR5","doi-asserted-by":"crossref","unstructured":"Bui, D.C., Le, T.V., Ngo, B.H.: C2t-net: Channel-aware cross-fused transformer-style networks for pedestrian attribute recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 351\u2013358 (2024)","DOI":"10.1109\/WACVW60836.2024.00043"},{"key":"4919_CR6","doi-asserted-by":"crossref","unstructured":"Lian, D., Li, J., Zheng, J., Luo, W., Gao, S.: Density map regression guided detection network for rgb-d crowd counting and localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1821\u20131830 (2019)","DOI":"10.1109\/CVPR.2019.00192"},{"key":"4919_CR7","doi-asserted-by":"crossref","unstructured":"Liu, L., Chen, J., Wu, H., Li, G., Li, C., Lin, L.: Cross-modal collaborative representation learning and a large-scale rgbt benchmark for crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4823\u20134833 (2021)","DOI":"10.1109\/CVPR46437.2021.00479"},{"key":"4919_CR8","doi-asserted-by":"crossref","unstructured":"Peng, T., Li, Q., Zhu, P.: Rgb-t crowd counting from drone: A benchmark and mmccn network. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69544-6_30"},{"key":"4919_CR9","doi-asserted-by":"crossref","unstructured":"Zhang, B., Du, Y., Zhao, Y., Wan, J., Tong, Z.: I-mmccn: Improved mmccn for rgb-t crowd counting of drone images. In: 2021 7th IEEE International Conference on Network Intelligence and Digital Content (IC-NIDC), pp. 117\u2013121 (2021). IEEE","DOI":"10.1109\/IC-NIDC54101.2021.9660586"},{"key":"4919_CR10","doi-asserted-by":"crossref","unstructured":"Chan, A.B., Vasconcelos, N.: Bayesian poisson regression for crowd counting. In: 2009 IEEE 12th International Conference on Computer Vision, pp. 545\u2013551 (2009). IEEE","DOI":"10.1109\/ICCV.2009.5459191"},{"key":"4919_CR11","doi-asserted-by":"crossref","unstructured":"Chen, K., Loy, C.C., Gong, S., Xiang, T.: Feature mining for localised crowd counting. In: Bmvc, vol. 1, p. 3 (2012)","DOI":"10.5244\/C.26.21"},{"key":"4919_CR12","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Patel, V.M.: Generating high-quality crowd density maps using contextual pyramid cnns. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1861\u20131870 (2017)","DOI":"10.1109\/ICCV.2017.206"},{"key":"4919_CR13","doi-asserted-by":"crossref","unstructured":"Yang, S.-D., Su, H.-T., Hsu, W.H., Chen, W.-C.: Deccnet: Depth enhanced crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, pp. 0\u20130 (2019)","DOI":"10.1109\/ICCVW.2019.00553"},{"key":"4919_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Choi, S., Hong, S.: Spatio-channel attention blocks for cross-modal crowd counting. In: Proceedings of the Asian Conference on Computer Vision, pp. 90\u2013107 (2022)","DOI":"10.1007\/978-3-031-26284-5_2"},{"key":"4919_CR15","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"4919_CR16","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6142\u20136151 (2019)","DOI":"10.1109\/ICCV.2019.00624"},{"key":"4919_CR17","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"4919_CR18","doi-asserted-by":"crossref","unstructured":"Cao, X., Wang, Z., Zhao, Y., Su, F.: Scale aggregation network for accurate and efficient crowd counting. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 734\u2013750 (2018)","DOI":"10.1007\/978-3-030-01228-1_45"},{"key":"4919_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Chan, A.B.: Wide-area crowd counting via ground-plane density maps and multi-view fusion cnns. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8297\u20138306 (2019)","DOI":"10.1109\/CVPR.2019.00849"},{"key":"4919_CR20","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Zhai, Y., Borji, A., Yang, J., Shao, L.: Bbs-net: Rgb-d salient object detection with a bifurcated backbone strategy network. In: European Conference on Computer Vision, pp. 275\u2013292 (2020). Springer","DOI":"10.1007\/978-3-030-58610-2_17"},{"key":"4919_CR21","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhang, L., Zhao, X., Lu, H.: Hierarchical dynamic filtering network for rgb-d salient object detection. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXV 16, pp. 235\u2013252 (2020). Springer","DOI":"10.1007\/978-3-030-58595-2_15"},{"issue":"9","key":"4919_CR22","first-page":"5761","volume":"44","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Fan, D.-P., Dai, Y., Anwar, S., Saleh, F., Aliakbarian, S., Barnes, N.: Uncertainty inspired rgb-d saliency detection. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 5761\u20135779 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"4919_CR23","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/s11760-022-02266-4","volume":"17","author":"S Li","year":"2023","unstructured":"Li, S., Hu, Z., Zhao, M., Bi, S., Sun, Z.: Cross-modal collaborative representation and multi-level supervision for crowd counting. SIViP 17(3), 601\u2013608 (2023)","journal-title":"SIViP"},{"key":"4919_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104592","volume":"129","author":"S Zhang","year":"2023","unstructured":"Zhang, S., Wang, W., Zhao, W., Wang, L., Li, Q.: A cross-modal crowd counting method combining cnn and cross-modal transformer. Image Vis. Comput. 129, 104592 (2023)","journal-title":"Image Vis. Comput."},{"key":"4919_CR25","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1109\/TMM.2023.3262978","volume":"26","author":"Y Liu","year":"2024","unstructured":"Liu, Y., Cao, G., Shi, B., Hu, Y.: Ccanet: A collaborative cross-modal attention network for rgb-d crowd counting. IEEE Trans. Multimedia 26, 154\u2013165 (2024)","journal-title":"IEEE Trans. Multimedia"},{"issue":"10","key":"4919_CR26","doi-asserted-by":"publisher","first-page":"7279","DOI":"10.1007\/s00371-024-03388-1","volume":"40","author":"L Huang","year":"2024","unstructured":"Huang, L., Kang, W., Chen, G., Zhang, Q., Zhang, J.: Light-sensitive and adaptive fusion network for rgb-t crowd counting. Vis. Comput. 40(10), 7279\u20137292 (2024)","journal-title":"Vis. Comput."},{"key":"4919_CR27","doi-asserted-by":"crossref","unstructured":"Liang, D., Xu, W., Zhu, Y., Zhou, Y.: Focal inverse distance transform maps for crowd localization. IEEE Transactions on Multimedia (2022)","DOI":"10.1109\/TMM.2022.3203870"},{"issue":"12","key":"4919_CR28","doi-asserted-by":"publisher","first-page":"24540","DOI":"10.1109\/TITS.2022.3203385","volume":"23","author":"W Zhou","year":"2022","unstructured":"Zhou, W., Pan, Y., Lei, J., Ye, L., Yu, L.: Defnet: Dual-branch enhanced feature fusion network for rgb-t crowd counting. IEEE Trans. Intell. Transp. Syst. 23(12), 24540\u201324549 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"4919_CR29","first-page":"1","volume":"73","author":"Z Xie","year":"2024","unstructured":"Xie, Z., Shao, F., Mu, B., Chen, H., Jiang, Q., Lu, C., Ho, Y.-S.: Bgdfnet: bidirectional gated and dynamic fusion network for rgb-t crowd counting in smart city system. IEEE Trans. Instrum. Meas. 73, 1\u201316 (2024)","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"19","key":"4919_CR30","doi-asserted-by":"publisher","first-page":"31758","DOI":"10.1109\/JIOT.2024.3420449","volume":"11","author":"B Mu","year":"2024","unstructured":"Mu, B., Shao, F., Xie, Z., Chen, H., Jiang, Q., Ho, Y.-S.: Visual prompt multibranch fusion network for rgb-thermal crowd counting. IEEE Internet Things J. 11(19), 31758\u201331775 (2024)","journal-title":"IEEE Internet Things J."}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04919-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04919-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04919-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:55:25Z","timestamp":1764723325000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04919-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":30,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4919"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04919-6","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"8 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 November 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"1352"}}