{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T16:14:19Z","timestamp":1770394459397,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557578","type":"print"},{"value":"9789819557585","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5758-5_2","type":"book-chapter","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T05:00:09Z","timestamp":1770354009000},"page":"15-27","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Sparse Self-attention-Guided Network for\u00a0Multispectral Object Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-9060-8547","authenticated-orcid":false,"given":"Haidong","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0913-0130","authenticated-orcid":false,"given":"Zhigang","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0122-6864","authenticated-orcid":false,"given":"Shengze","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"key":"2_CR1","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/j.patrec.2023.03.004","volume":"168","author":"A Amziane","year":"2023","unstructured":"Amziane, A., Losson, O., Mathon, B., Macaire, L.: MSFA-Net: a convolutional neural network based on multispectral filter arrays for texture feature extraction. Pattern Recogn. Lett. 168, 93\u201399 (2023)","journal-title":"Pattern Recogn. Lett."},{"issue":"6","key":"2_CR2","doi-asserted-by":"publisher","first-page":"2934","DOI":"10.3390\/s23062934","volume":"23","author":"C Bao","year":"2023","unstructured":"Bao, C., Cao, J., Hao, Q., Cheng, Y., Ning, Y., Zhao, T.: Dual-YOLO architecture from infrared and visible images for object detection. Sensors 23(6), 2934 (2023)","journal-title":"Sensors"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Bay\u00f3n-Guti\u00e9rrez, M., et al.: TEDNet: twin encoder decoder neural network for 2d camera and lidar road detection. Logic J. IGPL, jzae048 (2024)","DOI":"10.1093\/jigpal\/jzae048"},{"key":"2_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126804","volume":"560","author":"N Bustos","year":"2023","unstructured":"Bustos, N., Mashhadi, M., Lai-Yuen, S.K., Sarkar, S., Das, T.K.: A systematic literature review on object detection using near infrared and thermal images. Neurocomputing 560, 126804 (2023)","journal-title":"Neurocomputing"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: DeYOLO: dual-feature-enhancement yolo for cross-modality object detection. In: International Conference on Pattern Recognition, pp. 236\u2013252. Springer (2025)","DOI":"10.1007\/978-3-031-78447-7_16"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Y., Gunraj, H., Zeng, E.Z., Meyer, R., Gilles, M., Wong, A.: MMRNet: improving reliability for multimodal object detection and segmentation for bin picking via multimodal redundancy. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 68\u201377 (2023)","DOI":"10.1109\/CVPRW59228.2023.00012"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Fu, H., et al.: LRAF-Net: long-range attention fusion network for visible\u2013infrared object detection. IEEE Trans. Neural Netw. Learn. Syst. 35(10), 13232\u201313245 (2023)","DOI":"10.1109\/TNNLS.2023.3266452"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Gallagher, J.E., Oughton, E.J.: Surveying you only look once (YOLO) multispectral object detection advancements, applications and challenges. IEEE Access 13 (2025)","DOI":"10.1109\/ACCESS.2025.3526458"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"2_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102484","volume":"79","author":"M Hu","year":"2023","unstructured":"Hu, M., Wu, Y., Yang, Y., Fan, J., Jing, B.: DAGL-Faster: domain adaptive faster R-CNN for vehicle object detection in rainy and foggy weather conditions. Displays 79, 102484 (2023)","journal-title":"Displays"},{"issue":"13","key":"2_CR11","doi-asserted-by":"publisher","first-page":"3214","DOI":"10.3390\/rs15133214","volume":"15","author":"S Hu","year":"2023","unstructured":"Hu, S., Zhao, F., Lu, H., Deng, Y., Du, J., Shen, X.: Improving YOLOV7-tiny for infrared and visible light image object detection on drones. Remote Sensing 15(13), 3214 (2023)","journal-title":"Remote Sensing"},{"key":"2_CR12","volume":"130","author":"C Jiang","year":"2024","unstructured":"Jiang, C., et al.: M2FNet: multi-modal fusion network for object detection from visible and thermal infrared images. Int. J. Appl. Earth Obs. Geoinf. 130, 103918 (2024)","journal-title":"Int. J. Appl. Earth Obs. Geoinf."},{"key":"2_CR13","first-page":"1","volume":"1","author":"D Jiang","year":"2011","unstructured":"Jiang, D., Zhuang, D., Huang, Y., Fu, J.: Survey of multispectral image fusion techniques in remote sensing applications. Image Fus. Appl. 1, 1\u201322 (2011)","journal-title":"Image Fus. Appl."},{"key":"2_CR14","doi-asserted-by":"publisher","first-page":"8906","DOI":"10.1109\/TMM.2023.3243616","volume":"25","author":"J Jiao","year":"2023","unstructured":"Jiao, J., et al.: DilateFormer: multi-scale dilated transformer for visual recognition. IEEE Trans. Multimedia 25, 8906\u20138919 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"2_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2022.103812","volume":"132","author":"R Kaur","year":"2023","unstructured":"Kaur, R., Singh, S.: A comprehensive review of object detection with deep learning. Digit. Signal Process. 132, 103812 (2023)","journal-title":"Digit. Signal Process."},{"key":"2_CR16","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.inffus.2021.02.023","volume":"73","author":"H Li","year":"2021","unstructured":"Li, H., Wu, X.J., Kittler, J.: RFN-Nest: an end-to-end residual fusion network for infrared and visible images. Info. Fusion 73, 72\u201386 (2021)","journal-title":"Info. Fusion"},{"key":"2_CR17","doi-asserted-by":"publisher","first-page":"8432","DOI":"10.1109\/TMM.2023.3237155","volume":"25","author":"T Liang","year":"2023","unstructured":"Liang, T., Jin, Y., Liu, W., Li, Y.: Cross-modality transformer with modality mining for visible-infrared person re-identification. IEEE Trans. Multimedia 25, 8432\u20138444 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135811 (2022)","DOI":"10.1109\/CVPR52688.2022.00571"},{"issue":"1","key":"2_CR19","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TCSVT.2021.3056725","volume":"32","author":"J Liu","year":"2021","unstructured":"Liu, J., Fan, X., Jiang, J., Liu, R., Luo, Z.: Learning a deep multi-scale feature ensemble and an edge-attention guidance for image fusion. IEEE Trans. Circuits Syst. Video Technol. 32(1), 105\u2013119 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Liu, J., Shang, J., Liu, R., Fan, X.: Halder: hierarchical attention-guided learning with detail-refinement for multi-exposure image fusion. In: 2021 IEEE International Conference on Multimedia and Expo (ICME) (2021)","DOI":"10.1109\/ICME51207.2021.9428192"},{"key":"2_CR21","doi-asserted-by":"publisher","first-page":"11991","DOI":"10.1109\/ACCESS.2023.3242604","volume":"11","author":"A Naveed","year":"2023","unstructured":"Naveed, A., et al.: Saliency-based semantic weeds detection and classification using UAV multispectral imaging. IEEE Access 11, 11991\u201312003 (2023)","journal-title":"IEEE Access"},{"issue":"4","key":"2_CR22","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1007\/s11760-022-02319-8","volume":"17","author":"C Nie","year":"2023","unstructured":"Nie, C., et al.: Transnational image object detection datasets from nighttime driving. SIViP 17(4), 1123\u20131131 (2023)","journal-title":"SIViP"},{"key":"2_CR23","unstructured":"Paszke, A.: PYTorch: an imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019)"},{"key":"2_CR24","unstructured":"Qingyun, F., Dapeng, H., Zhaokui, W.: Cross-modality fusion transformer for multispectral object detection. arXiv preprint arXiv:2111.00273 (2021)"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"2_CR26","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"2_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109913","volume":"145","author":"J Shen","year":"2024","unstructured":"Shen, J., Chen, Y., Liu, Y., Zuo, X., Fan, H., Yang, W.: ICAFusion: iterative cross-attention guided feature fusion for multispectral object detection. Pattern Recogn. 145, 109913 (2024)","journal-title":"Pattern Recogn."},{"key":"2_CR28","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv .Neural Info. Process. Syst. 30 (2017)"},{"key":"2_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101828","volume":"98","author":"D Wang","year":"2023","unstructured":"Wang, D., Liu, J., Liu, R., Fan, X.: An interactively reinforced paradigm for joint infrared-visible image fusion and saliency object detection. Info. Fusion 98, 101828 (2023)","journal-title":"Info. Fusion"},{"key":"2_CR30","first-page":"1","volume":"62","author":"H Wang","year":"2024","unstructured":"Wang, H.: Cross-modal oriented object detection of UAV aerial images based on image feature. IEEE Trans. Geosci. Remote Sens. 62, 1\u201321 (2024)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"2_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.infrared.2024.105489","volume":"141","author":"P Wang","year":"2024","unstructured":"Wang, P., Wu, J., Fang, A., Zhu, Z., Wang, C.: Multi-spectral image fusion for moving object detection. Infrared Phys. Technol. 141, 105489 (2024)","journal-title":"Infrared Phys. Technol."},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Wu, X., Zhang, B., Wan, W.: Adaptive cross-modal fusion for robust multi-modal object detection in infrared\u2013visible imaging. Optical Review, pp. 1\u201316 (2025)","DOI":"10.1007\/s10043-025-00977-w"},{"key":"2_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.123233","volume":"247","author":"Z Wu","year":"2024","unstructured":"Wu, Z., Li, H., Wang, Y., Long, B.: MCCANet: a multispectral class-constraint attentional neural network for object detection in mining scenes. Expert Syst. Appl. 247, 123233 (2024)","journal-title":"Expert Syst. Appl."},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Xu, H., Ma, J., Jiang, J., Guo, X., Ling, H.: U2Fusion: a unified unsupervised image fusion network. IEEE Trans. Pattern Anal. Mach. Intell. 44(1) (2020)","DOI":"10.1109\/TPAMI.2020.3012548"},{"key":"2_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102246","volume":"105","author":"M Yuan","year":"2024","unstructured":"Yuan, M., Shi, X., Wang, N., Wang, Y., Wei, X.: Improving RGB-infrared object detection with cascade alignment-guided transformer. Info. Fusion 105, 102246 (2024)","journal-title":"Info. Fusion"},{"issue":"21","key":"2_CR36","doi-asserted-by":"publisher","first-page":"3966","DOI":"10.3390\/math10213966","volume":"10","author":"JS Yun","year":"2022","unstructured":"Yun, J.S., Park, S.H., Yoo, S.B.: Infusion-Net: inter-and intra-weighted cross-fusion network for multispectral object detection. Mathematics 10(21), 3966 (2022)","journal-title":"Mathematics"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, H., Fromont, E., Lefevre, S., Avignon, B.: Multispectral fusion for object detection with cyclic fuse-and-refine blocks. In: 2020 IEEE International Conference on Image Processing (ICIP), pp. 276\u2013280. IEEE (2020)","DOI":"10.1109\/ICIP40778.2020.9191080"},{"key":"2_CR38","first-page":"52296","volume":"37","author":"J Zhang","year":"2024","unstructured":"Zhang, J., et al.: E2E-MFD: towards end-to-end synchronous multimodal fusion detection. Adv. Neural. Inf. Process. Syst. 37, 52296\u201352322 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR39","doi-asserted-by":"publisher","first-page":"8988","DOI":"10.1109\/TMM.2023.3243659","volume":"25","author":"J Zhang","year":"2023","unstructured":"Zhang, J., et al.: Transformer based conditional GAN for multimodal image fusion. IEEE Trans. Multimedia 25, 8988\u20139001 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"2_CR40","unstructured":"Zhao, G., Lin, J., Zhang, Z., Ren, X., Su, Q., Sun, X.: Explicit sparse transformer: Concentrated attention through explicit selection. arXiv preprint arXiv:1912.11637 (2019)"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Zhao, W., Xie, S., Zhao, F., He, Y., Lu, H.: MetaFusion: infrared and visible image fusion via meta-feature embedding from object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13955\u201313965 (2023)","DOI":"10.1109\/CVPR52729.2023.01341"},{"issue":"4","key":"2_CR42","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10462-024-10721-6","volume":"57","author":"X Zhao","year":"2024","unstructured":"Zhao, X., Wang, L., Zhang, Y., Han, X., Deveci, M., Parmar, M.: A review of convolutional neural networks in computer vision. Artif. Intell. Rev. 57(4), 99 (2024)","journal-title":"Artif. Intell. Rev."},{"issue":"3","key":"2_CR43","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou, Z., Chen, K., Shi, Z., Guo, Y., Ye, J.: Object detection in 20 years: a survey. Proc. IEEE 111(3), 257\u2013276 (2023)","journal-title":"Proc. IEEE"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5758-5_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T05:00:15Z","timestamp":1770354015000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5758-5_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557578","9789819557585"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5758-5_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"7 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}