{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:06:27Z","timestamp":1780931187742,"version":"3.54.1"},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.patcog.2026.114024","type":"journal-article","created":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T15:07:22Z","timestamp":1779548842000},"page":"114024","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["A Guided Fusion Network based on Cross-Scale Semantic Alignment for multi-spectral object detection"],"prefix":"10.1016","volume":"180","author":[{"given":"Hongtao","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jing","family":"Rong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9486-7144","authenticated-orcid":false,"given":"Qiuzhan","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Di","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weiqi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.patcog.2026.114024_b1","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","article-title":"Object detection in 20 years: A survey","volume":"111","author":"Zou","year":"2023","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.patcog.2026.114024_b2","doi-asserted-by":"crossref","unstructured":"P. Cong, X. Zhu, F. Qiao, Y. Ren, X. Peng, Y. Hou, L. Xu, R. Yang, D. Manocha, Y. Ma, STCrowd: A Multimodal Dataset for Pedestrian Perception in Crowded Scenes, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 19608\u201319617.","DOI":"10.1109\/CVPR52688.2022.01899"},{"key":"10.1016\/j.patcog.2026.114024_b3","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1016\/j.inffus.2022.10.034","article-title":"DIVFusion: Darkness-Free infrared and visible image fusion","volume":"91","author":"Tang","year":"2023","journal-title":"Inf. Fusion"},{"issue":"2","key":"10.1016\/j.patcog.2026.114024_b4","doi-asserted-by":"crossref","first-page":"1523","DOI":"10.1109\/TIV.2023.3240287","article-title":"Bridging the view disparity between radar and camera features for Multi-Modal fusion 3D object detection","volume":"8","author":"Zhou","year":"2023","journal-title":"IEEE Trans. Intell. Veh."},{"issue":"12","key":"10.1016\/j.patcog.2026.114024_b5","doi-asserted-by":"crossref","first-page":"10812","DOI":"10.1109\/TNNLS.2022.3171553","article-title":"3D-DFM: Anchor-Free multimodal 3-D object detection with dynamic fusion module for autonomous driving","volume":"34","author":"Lin","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.114024_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.108350","article-title":"Multi-Frame based adversarial learning approach for video surveillance","volume":"122","author":"Patil","year":"2022","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114024_b7","doi-asserted-by":"crossref","unstructured":"W. Shu, J. Wan, K.C. Tan, S. Kwong, A.B. Chan, Crowd Counting in the Frequency Domain, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 19618\u201319627.","DOI":"10.1109\/CVPR52688.2022.01900"},{"issue":"9","key":"10.1016\/j.patcog.2026.114024_b8","doi-asserted-by":"crossref","first-page":"15940","DOI":"10.1109\/TITS.2022.3146575","article-title":"Spatio-Contextual deep network-based multimodal Pedestrian detection for autonomous driving","volume":"23","author":"Dasgupta","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.114024_b9","doi-asserted-by":"crossref","unstructured":"J.U. Kim, S. Park, Y.M. Ro, Towards Versatile Pedestrian Detector with Multisensory-Matching and Multispectral Recalling Memory, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36, 2022, pp. 1157\u20131165.","DOI":"10.1609\/aaai.v36i1.20001"},{"key":"10.1016\/j.patcog.2026.114024_b10","doi-asserted-by":"crossref","first-page":"3420","DOI":"10.1109\/TMM.2022.3160589","article-title":"Confidence-Aware fusion using Dempster-Shafer theory for multispectral pedestrian detection","volume":"25","author":"Li","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.114024_b11","series-title":"European Conference on Computer Vision","first-page":"787","article-title":"Improving multispectral pedestrian detection by addressing modality imbalance problems","author":"Zhou","year":"2020"},{"key":"10.1016\/j.patcog.2026.114024_b12","doi-asserted-by":"crossref","unstructured":"Y. Cao, J. Bin, J. Hamari, E. Blasch, Z. Liu, Multimodal Object Detection by Channel Switching and Spatial Attention, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 403\u2013411.","DOI":"10.1109\/CVPRW59228.2023.00046"},{"key":"10.1016\/j.patcog.2026.114024_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109913","article-title":"ICAFusion: Iterative cross-attention guided feature fusion for multispectral object detection","volume":"145","author":"Shen","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114024_b14","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1016\/j.patrec.2024.02.012","article-title":"CrossFormer: Cross-guided attention for Multi-Modal object detection","volume":"179","author":"Lee","year":"2024","journal-title":"Pattern Recognit. Lett."},{"issue":"3","key":"10.1016\/j.patcog.2026.114024_b15","doi-asserted-by":"crossref","first-page":"4145","DOI":"10.1109\/TNNLS.2021.3105143","article-title":"Weakly aligned feature fusion for multimodal object detection","volume":"36","author":"Zhang","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.114024_b16","doi-asserted-by":"crossref","unstructured":"H. Zhang, E. Fromont, S. Lef\u00e8vre, B. Avignon, Guided Attentive Feature Fusion for Multispectral Pedestrian Detection, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2021, pp. 72\u201380.","DOI":"10.1109\/WACV48630.2021.00012"},{"key":"10.1016\/j.patcog.2026.114024_b17","series-title":"European Conference on Computer Vision","first-page":"509","article-title":"Translation, scale and rotation: Cross-Modal alignment meets RGB-Infrared vehicle detection","author":"Yuan","year":"2022"},{"key":"10.1016\/j.patcog.2026.114024_b18","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.patcog.2026.114024_b19","series-title":"Cross-Modality fusion transformer for multispectral object detection","author":"Qingyun","year":"2021"},{"issue":"9","key":"10.1016\/j.patcog.2026.114024_b20","doi-asserted-by":"crossref","first-page":"9984","DOI":"10.1109\/TITS.2023.3266487","article-title":"Multi-Modal feature pyramid transformer for RGB-Infrared object detection","volume":"24","author":"Zhu","year":"2023","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.114024_b21","doi-asserted-by":"crossref","first-page":"6449","DOI":"10.1109\/TMM.2024.3350926","article-title":"Cross-Modality Proposal-Guided feature mining for unregistered RGB-Thermal pedestrian detection","volume":"26","author":"Tian","year":"2024","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.114024_b22","series-title":"YOLOv4: Optimal speed and accuracy of object detection","author":"Bochkovskiy","year":"2020"},{"key":"10.1016\/j.patcog.2026.114024_b23","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/j.patcog.2018.08.005","article-title":"Illumination-Aware faster R-CNN for robust multispectral pedestrian detection","volume":"85","author":"Li","year":"2019","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114024_b24","series-title":"Multispectral deep neural networks for pedestrian detection","author":"Liu","year":"2016"},{"key":"10.1016\/j.patcog.2026.114024_b25","series-title":"Deformable DETR: Deformable transformers for End-to-End object detection","author":"Zhu","year":"2020"},{"issue":"6","key":"10.1016\/j.patcog.2026.114024_b26","doi-asserted-by":"crossref","first-page":"820","DOI":"10.3390\/s16060820","article-title":"Pedestrian detection at Day\/Night time with visible and FIR cameras: A comparison","volume":"16","author":"Gonz\u00e1lez","year":"2016","journal-title":"Sensors"},{"key":"10.1016\/j.patcog.2026.114024_b27","doi-asserted-by":"crossref","unstructured":"X. Jia, C. Zhu, M. Li, W. Tang, W. Zhou, LLVIP: A Visible-Infrared Paired Dataset for Low-Light Vision, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3496\u20133504.","DOI":"10.1109\/ICCVW54120.2021.00389"},{"key":"10.1016\/j.patcog.2026.114024_b28","series-title":"2020 IEEE International Conference on Image Processing","first-page":"276","article-title":"Multispectral fusion for object detection with cyclic Fuse-and-Refine blocks","author":"Zhang","year":"2020"},{"issue":"4","key":"10.1016\/j.patcog.2026.114024_b29","doi-asserted-by":"crossref","first-page":"743","DOI":"10.1109\/TPAMI.2011.155","article-title":"Pedestrian detection: An evaluation of the state of the art","volume":"34","author":"Dollar","year":"2011","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.114024_b30","doi-asserted-by":"crossref","unstructured":"L. Zhang, X. Zhu, X. Chen, X. Yang, Z. Lei, Z. Liu, Weakly Aligned Cross-Modal Learning for Multispectral Pedestrian Detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 5127\u20135137.","DOI":"10.1109\/ICCV.2019.00523"},{"issue":"4","key":"10.1016\/j.patcog.2026.114024_b31","doi-asserted-by":"crossref","first-page":"7846","DOI":"10.1109\/LRA.2021.3099870","article-title":"MLPD: Multi-label pedestrian detector in multispectral domain","volume":"6","author":"Kim","year":"2021","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.patcog.2026.114024_b32","doi-asserted-by":"crossref","first-page":"852","DOI":"10.1109\/TMM.2023.3272471","article-title":"Multiscale cross-modal homogeneity enhancement and confidence-aware fusion for multispectral pedestrian detection","volume":"26","author":"Li","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.114024_b33","article-title":"Deformle cross-attention transformer for weakly aligned RGB-T Pedestrian detection","author":"Hu","year":"2025","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.114024_b34","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.108786","article-title":"Cross-Modality attentive feature fusion for object detection in multispectral remote sensing imagery","volume":"130","author":"Qingyun","year":"2022","journal-title":"Pattern Recognit."},{"issue":"10","key":"10.1016\/j.patcog.2026.114024_b35","doi-asserted-by":"crossref","first-page":"6700","DOI":"10.1109\/TCSVT.2022.3168279","article-title":"Drone-Based RGB-infrared cross-modality vehicle detection via Uncertainty-Aware learning","volume":"32","author":"Sun","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.114024_b36","article-title":"Multidimensional fusion network for multispectral object detection","author":"Yang","year":"2024","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.114024_b37","doi-asserted-by":"crossref","unstructured":"M. Yuan, B. Cui, T. Zhao, J. Wang, S. Fu, X. Yang, X. Wei, UniRGB-IR: A Unified Framework for Visible-Infrared Semantic Tasks via Adapter Tuning, in: Proceedings of the ACM International Conference on Multimedia, 2025, pp. 2409\u20132418.","DOI":"10.1145\/3746027.3754806"},{"key":"10.1016\/j.patcog.2026.114024_b38","series-title":"Removal and selection: Improving rgb-infrared object detection via coarse-to-fine fusion","author":"Zhao","year":"2024"},{"key":"10.1016\/j.patcog.2026.114024_b39","article-title":"M2FNet: Multi-Modal fusion network for object detection from visible and thermal infrared images","volume":"130","author":"Jiang","year":"2024","journal-title":"Int. J. Appl. Earth Obs. Geoinf."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009891?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009891?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T14:51:58Z","timestamp":1780930318000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326009891"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":39,"alternative-id":["S0031320326009891"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114024","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A Guided Fusion Network based on Cross-Scale Semantic Alignment for multi-spectral object detection","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114024","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114024"}}