{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T11:50:40Z","timestamp":1780919440088,"version":"3.54.1"},"publisher-location":"Cham","reference-count":65,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729034","type":"print"},{"value":"9783031729041","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72904-1_14","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T13:28:08Z","timestamp":1732109288000},"page":"231-250","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Multi-modal Crowd Counting via\u00a0a\u00a0Broker Modality"],"prefix":"10.1007","author":[{"given":"Haoliang","family":"Meng","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaopeng","family":"Hong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenhao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Miao","family":"Shang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wangmeng","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"14_CR1","unstructured":"Alehdaghi, M., Josi, A., Shamsolmoali, P., Cruz, R.M., Granger, E.: Adaptive generation of privileged intermediate information for visible-infrared person re-identification. arXiv preprint arXiv:2307.03240 (2023)"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Chen, K., Chen, J.K., Chuang, J., V\u00e1zquez, M., Savarese, S.: Topological planning with transformers for vision-and-language navigation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11276\u201311286 (2021)","DOI":"10.1109\/CVPR46437.2021.01112"},{"key":"14_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/978-3-030-58610-2_17","volume-title":"Computer Vision \u2013 ECCV 2020","author":"D-P Fan","year":"2020","unstructured":"Fan, D.-P., Zhai, Y., Borji, A., Yang, J., Shao, L.: BBS-Net: RGB-D salient object detection with a bifurcated backbone strategy network. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12357, pp. 275\u2013292. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58610-2_17"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Gao, J., Cai, X.F.: Image matching method based on multi-scale corner detection. In: 2017 13th International Conference on Computational Intelligence and Security (CIS), pp. 125\u2013129. IEEE (2017)","DOI":"10.1109\/CIS.2017.00035"},{"key":"14_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1007\/978-3-319-19390-8_48","volume-title":"Pattern Recognition and Image Analysis","author":"R Guerrero-G\u00f3mez-Olmedo","year":"2015","unstructured":"Guerrero-G\u00f3mez-Olmedo, R., Torre-Jim\u00e9nez, B., L\u00f3pez-Sastre, R., Maldonado-Basc\u00f3n, S., O\u00f1oro-Rubio, D.: Extremely overlapping vehicle counting. In: Paredes, R., Cardoso, J.S., Pardo, X.M. (eds.) IbPRIA 2015. LNCS, vol. 9117, pp. 423\u2013431. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-19390-8_48"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Guo, Q., Yuan, P., Huang, X., Ye, Y.: Consistency-constrained RGB-T crowd counting via mutual information maximization. Complex Intell. Syst. 1\u201322 (2024)","DOI":"10.1007\/s40747-024-01427-x"},{"key":"14_CR7","doi-asserted-by":"publisher","unstructured":"Huang, Z., Liu, J., Fan, X., Liu, R., Zhong, W., Luo, Z.: ReCoNet: recurrent correction network for fast and efficient multi-modality image fusion. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13678, pp. 539\u2013555. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19797-0_31","DOI":"10.1007\/978-3-031-19797-0_31"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Idrees, H., et al.: Composition loss for counting, density map estimation and localization in dense crowds. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 532\u2013546 (2018)","DOI":"10.1007\/978-3-030-01216-8_33"},{"issue":"4","key":"14_CR9","doi-asserted-by":"publisher","first-page":"2559","DOI":"10.1109\/TPWRD.2020.3011962","volume":"36","author":"Q Jiang","year":"2020","unstructured":"Jiang, Q., et al.: A contour angle orientation for power equipment infrared and visible image registration. IEEE Trans. Power Deliv. 36(4), 2559\u20132569 (2020)","journal-title":"IEEE Trans. Power Deliv."},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Kong, W., Liu, J., Hong, Y., Li, H., Shen, J.: Cross-modal collaborative feature representation via transformer-based multimodal mixers for RGB-T crowd counting. Expert Syst. Appl. 124483 (2024)","DOI":"10.1016\/j.eswa.2024.124483"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Li, D., Wei, X., Hong, X., Gong, Y.: Infrared-visible cross-modal person re-identification with an x modality. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 4610\u20134617 (2020)","DOI":"10.1609\/aaai.v34i04.5891"},{"key":"14_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.109944","volume":"257","author":"H Li","year":"2022","unstructured":"Li, H., Zhang, S., Kong, W.: Learning the cross-modal discriminative feature representation for RGB-T crowd counting. Knowl.-Based Syst. 257, 109944 (2022)","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"14_CR13","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1109\/TII.2022.3171352","volume":"19","author":"H Li","year":"2022","unstructured":"Li, H., Zhang, S., Kong, W.: RGB-D crowd counting with cross-modal cycle-attention fusion and fine-coarse supervision. IEEE Trans. Ind. Inf. 19(1), 306\u2013316 (2022)","journal-title":"IEEE Trans. Ind. Inf."},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Li, Y.C.: Dilated convolutional neural networks for understanding the highly congested scenes\/y. li, x. zhang, d. chen. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.\u2013IEEE, pp. 1091\u20131100 (2018)","DOI":"10.1109\/CVPR.2018.00120"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, H., Luo, Y.: A comparison of pre-trained vision-and-language models for multimodal representation learning across medical images and reports. In: 2020 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 1999\u20132004. IEEE (2020)","DOI":"10.1109\/BIBM49941.2020.9313289"},{"issue":"12","key":"14_CR16","doi-asserted-by":"publisher","first-page":"9056","DOI":"10.1109\/TPAMI.2021.3124956","volume":"44","author":"D Lian","year":"2021","unstructured":"Lian, D., Chen, X., Li, J., Luo, W., Gao, S.: Locating and counting heads in crowds with a depth prior. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 9056\u20139072 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Lian, D., Li, J., Zheng, J., Luo, W., Gao, S.: Density map regression guided detection network for RGB-D crowd counting and localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1821\u20131830 (2019)","DOI":"10.1109\/CVPR.2019.00192"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Lin, H., et al.: Direct measure matching for crowd counting. In: The Thirtieth International Joint Conference on Artificial Intelligence (2021)","DOI":"10.24963\/ijcai.2021\/116"},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"Lin, H., Ma, Z., Hong, X., Shangguan, Q., Meng, D.: Gramformer: learning crowd counting via graph-modulated transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 3395\u20133403 (2024)","DOI":"10.1609\/aaai.v38i4.28126"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Lin, H., Ma, Z., Hong, X., Wang, Y., Su, Z.: Semi-supervised crowd counting via density agency. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 1416\u20131426 (2022)","DOI":"10.1145\/3503161.3547867"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Lin, H., Ma, Z., Ji, R., Wang, Y., Hong, X.: Boosting crowd counting via multifaceted attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19628\u201319637 (2022)","DOI":"10.1109\/CVPR52688.2022.01901"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Liu, C., Lu, H., Cao, Z., Liu, T.: Point-query quadtree for crowd counting, localization, and more. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1676\u20131685 (2023)","DOI":"10.1109\/ICCV51070.2023.00161"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Liu, J., Gao, C., Meng, D., Hauptmann, A.G.: Decidenet: counting varying density crowds through attention guided detection and density estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5197\u20135206 (2018)","DOI":"10.1109\/CVPR.2018.00545"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Liu, L., Chen, J., Wu, H., Li, G., Li, C., Lin, L.: Cross-modal collaborative representation learning and a large-scale rgbt benchmark for crowd counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4823\u20134833 (2021)","DOI":"10.1109\/CVPR46437.2021.00479"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Liu, L., Qiu, Z., Li, G., Liu, S., Ouyang, W., Lin, L.: Crowd counting with deep structured scale integration network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (October 2019)","DOI":"10.1109\/ICCV.2019.00186"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Liu, L., Wang, H., Li, G., Ouyang, W., Lin, L.: Crowd counting using deep recurrent spatial-aware network. arXiv preprint arXiv:1807.00601 (2018)","DOI":"10.24963\/ijcai.2018\/118"},{"key":"14_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1007\/978-3-030-58555-6_15","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Liu","year":"2020","unstructured":"Liu, Y., Liu, L., Wang, P., Zhang, P., Lei, Y.: Semi-supervised crowd counting via\u00a0self-training on surrogate tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12360, pp. 242\u2013259. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58555-6_15"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Y., Cao, G., Shi, B., Hu, Y.: Ccanet: a collaborative cross-modal attention network for RGB-D crowd counting. IEEE Trans. Multimed. (2023)","DOI":"10.1109\/TMM.2023.3262978"},{"key":"14_CR29","unstructured":"Liu, Z., Wu, W., Tan, Y., Zhang, G.: RGB-T multi-modal crowd counting based on transformer. In: The 33rd British Machine Vision Conference 2022 (2022)"},{"issue":"12","key":"14_CR30","doi-asserted-by":"publisher","first-page":"6469","DOI":"10.1109\/TGRS.2015.2441954","volume":"53","author":"J Ma","year":"2015","unstructured":"Ma, J., Zhou, H., Zhao, J., Gao, Y., Jiang, J., Tian, J.: Robust feature matching for remote sensing image registration via locally linear transforming. IEEE Trans. Geosci. Remote Sens. 53(12), 6469\u20136481 (2015)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6142\u20136151 (2019)","DOI":"10.1109\/ICCV.2019.00624"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Learning scales from points: a scale-aware probabilistic model for crowd counting. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 220\u2013228 (2020)","DOI":"10.1145\/3394171.3413642"},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Lin, H., Qiu, Y., Gong, Y.: Learning to count via unbalanced optimal transport. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 2319\u20132327 (2021)","DOI":"10.1609\/aaai.v35i3.16332"},{"key":"14_CR34","doi-asserted-by":"publisher","first-page":"6306","DOI":"10.1109\/TIP.2022.3207584","volume":"31","author":"H Mo","year":"2022","unstructured":"Mo, H., et al.: Attention-guided collaborative counting. IEEE Trans. Image Process. 31, 6306\u20136319 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"14_CR35","doi-asserted-by":"crossref","unstructured":"Mu, B., Shao, F., Xie, Z., Chen, H., Jiang, Q., Ho, Y.S.: Visual prompt multi-branch fusion network for rgb-thermal crowd counting. IEEE Internet Things J. (2024)","DOI":"10.1109\/JIOT.2024.3420449"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Pan, Y., Zhou, W., Fang, M., Qiang, F.: Graph enhancement and transformer aggregation network for rgb-thermal crowd counting. IEEE Geosci. Remote Sens. Lett. (2024)","DOI":"10.1109\/LGRS.2024.3362820"},{"key":"14_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106885","volume":"126","author":"Y Pan","year":"2023","unstructured":"Pan, Y., Zhou, W., Qian, X., Mao, S., Yang, R., Yu, L.: CGINet: cross-modality grade interaction network for RGB-T crowd counting. Eng. Appl. Artif. Intell. 126, 106885 (2023)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"14_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/978-3-030-58595-2_15","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Pang","year":"2020","unstructured":"Pang, Y., Zhang, L., Zhao, X., Lu, H.: Hierarchical dynamic filtering network for RGB-D salient object detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12370, pp. 235\u2013252. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58595-2_15"},{"key":"14_CR39","doi-asserted-by":"crossref","unstructured":"Peng, T., Li, Q., Zhu, P.: RGB-T crowd counting from drone: a benchmark and mmccn network. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69544-6_30"},{"key":"14_CR40","doi-asserted-by":"crossref","unstructured":"Ren, S., Du, Y., Lv, J., Han, G., He, S.: Learning from the master: distilling cross-modal advanced knowledge for lip reading. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13325\u201313333 (2021)","DOI":"10.1109\/CVPR46437.2021.01312"},{"key":"14_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"issue":"8","key":"14_CR42","first-page":"2739","volume":"43","author":"DB Sam","year":"2020","unstructured":"Sam, D.B., Peri, S.V., Sundararaman, M.N., Kamath, A., Babu, R.V.: Locate, size, and count: accurately resolving people in dense crowds via detection. IEEE Trans. Pattern Anal. Mach. Intell. 43(8), 2739\u20132751 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR43","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Patel, V.M.: Multi-level bottom-top and top-bottom feature fusion for crowd counting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1002\u20131012 (2019)","DOI":"10.1109\/ICCV.2019.00109"},{"key":"14_CR44","doi-asserted-by":"crossref","unstructured":"Tang, H., Wang, Y., Chau, L.P.: Tafnet: a three-stream adaptive fusion network for RGB-T crowd counting. In: 2022 IEEE International Symposium on Circuits and Systems (ISCAS), pp. 3299\u20133303. IEEE (2022)","DOI":"10.1109\/ISCAS48785.2022.9937583"},{"key":"14_CR45","doi-asserted-by":"publisher","first-page":"2876","DOI":"10.1109\/TIP.2021.3055632","volume":"30","author":"Y Wang","year":"2021","unstructured":"Wang, Y., Hou, J., Hou, X., Chau, L.P.: A self-training approach for point-supervised object detection and counting in crowds. IEEE Trans. Image Process. 30, 2876\u20132887 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"14_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, Z., Zheng, Y., Chuang, Y.Y., Satoh, S.: Learning to reduce dual-level discrepancy for infrared-visible person re-identification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 618\u2013626 (2019)","DOI":"10.1109\/CVPR.2019.00071"},{"key":"14_CR47","doi-asserted-by":"crossref","unstructured":"Wei, X., Li, D., Hong, X., Ke, W., Gong, Y.: Co-attentive lifting for infrared-visible person re-identification. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1028\u20131037 (2020)","DOI":"10.1145\/3394171.3413933"},{"key":"14_CR48","doi-asserted-by":"crossref","unstructured":"Wu, Z., Liu, L., Zhang, Y., Mao, M., Lin, L., Li, G.: Multimodal crowd counting with mutual attention transformers. In: 2022 IEEE International Conference on Multimedia and Expo (ICME), pp.\u00a01\u20136. IEEE (2022)","DOI":"10.1109\/ICME52920.2022.9859777"},{"issue":"8","key":"14_CR49","doi-asserted-by":"publisher","first-page":"4149","DOI":"10.1109\/TCSVT.2023.3241196","volume":"33","author":"Z Xie","year":"2023","unstructured":"Xie, Z., et al.: Cross-modality double bidirectional interaction and fusion network for RGB-T salient object detection. IEEE Trans. Circuits Syst. Video Technol. 33(8), 4149\u20134163 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"14_CR50","doi-asserted-by":"crossref","unstructured":"Xie, Z., et al.: Bgdfnet: bidirectional gated and dynamic fusion network for rgb-t crowd counting in smart city system. IEEE Trans. Instrum. Meas. (2024)","DOI":"10.1109\/TIM.2024.3418111"},{"key":"14_CR51","doi-asserted-by":"crossref","unstructured":"Xu, H., Yuan, J., Ma, J.: MURF: mutually reinforcing multi-modal image registration and fusion. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3283682"},{"key":"14_CR52","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122753","volume":"243","author":"X Yang","year":"2024","unstructured":"Yang, X., Zhou, W., Yan, W., Qian, X.: Cagnet: coordinated attention guidance network for rgb-t crowd counting. Expert Syst. Appl. 243, 122753 (2024)","journal-title":"Expert Syst. Appl."},{"key":"14_CR53","doi-asserted-by":"crossref","unstructured":"Yang, Y., Li, G., Wu, Z., Su, L., Huang, Q., Sebe, N.: Reverse perspective network for perspective-aware object counting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4374\u20134383 (2020)","DOI":"10.1109\/CVPR42600.2020.00443"},{"key":"14_CR54","doi-asserted-by":"crossref","unstructured":"Yu, L., et al.: Commercemm: large-scale commerce multimodal representation learning with omni retrieval. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 4433\u20134442 (2022)","DOI":"10.1145\/3534678.3539151"},{"key":"14_CR55","doi-asserted-by":"crossref","unstructured":"Zhang, B., Du, Y., Zhao, Y., Wan, J., Tong, Z.: I-mmccn: improved mmccn for rgb-t crowd counting of drone images. In: 2021 7th IEEE International Conference on Network Intelligence and Digital Content (IC-NIDC), pp. 117\u2013121. IEEE (2021)","DOI":"10.1109\/IC-NIDC54101.2021.9660586"},{"key":"14_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: UC-Net: uncertainty inspired RGB-D saliency detection via conditional variational autoencoders. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8582\u20138591 (2020)","DOI":"10.1109\/CVPR42600.2020.00861"},{"key":"14_CR57","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Chan, A.B.: Wide-area crowd counting via ground-plane density maps and multi-view fusion CNNs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8297\u20138306 (2019)","DOI":"10.1109\/CVPR.2019.00849"},{"key":"14_CR58","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Choi, S., Hong, S.: Spatio-channel attention blocks for cross-modal crowd counting. In: Proceedings of the Asian Conference on Computer Vision, pp. 90\u2013107 (2022)","DOI":"10.1007\/978-3-031-26284-5_2"},{"key":"14_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Yan, Y., Lu, Y., Wang, H.: Towards a unified middle modality learning for visible-infrared person re-identification. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 788\u2013796 (2021)","DOI":"10.1145\/3474085.3475250"},{"key":"14_CR60","doi-asserted-by":"crossref","unstructured":"Zhao, W., Xie, S., Zhao, F., He, Y., Lu, H.: Metafusion: infrared and visible image fusion via meta-feature embedding from object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13955\u201313965 (2023)","DOI":"10.1109\/CVPR52729.2023.01341"},{"key":"14_CR61","doi-asserted-by":"crossref","unstructured":"Zhao, Z., et al.: Cddfuse: correlation-driven dual-branch feature decomposition for multi-modality image fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5906\u20135916 (2023)","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"14_CR62","doi-asserted-by":"crossref","unstructured":"Zhao, Z., et al.: DDFM: denoising diffusion model for multi-modality image fusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8082\u20138093 (2023)","DOI":"10.1109\/ICCV51070.2023.00742"},{"issue":"12","key":"14_CR63","doi-asserted-by":"publisher","first-page":"24540","DOI":"10.1109\/TITS.2022.3203385","volume":"23","author":"W Zhou","year":"2022","unstructured":"Zhou, W., Pan, Y., Lei, J., Ye, L., Yu, L.: Defnet: dual-branch enhanced feature fusion network for rgb-t crowd counting. IEEE Trans. Intell. Transp. Syst. 23(12), 24540\u201324549 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"14_CR64","doi-asserted-by":"crossref","unstructured":"Zhou, W., Yang, X., Dong, X., Fang, M., Yan, W., Luo, T.: Mjpnet-s*: multistyle joint-perception network with knowledge distillation for drone rgb-thermal crowd density estimation in smart cities. IEEE Internet Things J. (2024)","DOI":"10.1109\/JIOT.2024.3369642"},{"key":"14_CR65","doi-asserted-by":"crossref","unstructured":"Zhou, W., Yang, X., Lei, J., Yan, W., Yu, L.: $${\\rm MC}^{3}{\\rm Net}$$: multimodality cross-guided compensation coordination network for RGB-T crowd counting. IEEE Trans. Intell. Transp. Syst. (2023)","DOI":"10.1109\/TITS.2023.3321328"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72904-1_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T19:50:47Z","timestamp":1742068247000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72904-1_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031729034","9783031729041"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72904-1_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}