{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T20:56:36Z","timestamp":1774040196185,"version":"3.50.1"},"reference-count":48,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2024JCYBQN-0719"],"award-info":[{"award-number":["2024JCYBQN-0719"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2024JC-DXWT-07"],"award-info":[{"award-number":["2024JC-DXWT-07"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFC3209305"],"award-info":[{"award-number":["2023YFC3209305"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFC3209304"],"award-info":[{"award-number":["2023YFC3209304"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0213600"],"award-info":[{"award-number":["2024YFE0213600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476220"],"award-info":[{"award-number":["62476220"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2024A1515030186"],"award-info":[{"award-number":["2024A1515030186"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.patcog.2026.113350","type":"journal-article","created":{"date-parts":[[2026,2,22]],"date-time":"2026-02-22T15:20:01Z","timestamp":1771773601000},"page":"113350","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Lightweight modal-guided cross-attention fusion network for visible-infrared object detection"],"prefix":"10.1016","volume":"177","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9067-9667","authenticated-orcid":false,"given":"Wencong","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3327-1138","authenticated-orcid":false,"given":"Hongxi","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7230-1476","authenticated-orcid":false,"given":"Xiuwei","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-2873","authenticated-orcid":false,"given":"Hanlin","family":"Yin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2977-8057","authenticated-orcid":false,"given":"Yanning","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"9","key":"10.1016\/j.patcog.2026.113350_bib0001","doi-asserted-by":"crossref","first-page":"15940","DOI":"10.1109\/TITS.2022.3146575","article-title":"Spatio-contextual deep network-based multimodal pedestrian detection for autonomous driving","volume":"23","author":"Dasgupta","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.113350_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111383","article-title":"MultiSpectral transformer fusion via exploiting similarity and complementarity for robust pedestrian detection","volume":"162","author":"Hou","year":"2025","journal-title":"Pattern Recognit."},{"issue":"6","key":"10.1016\/j.patcog.2026.113350_bib0003","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113350_bib0004","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2849","article-title":"Learning RoI transformer for oriented object detection in aerial images","author":"Ding","year":"2019"},{"key":"10.1016\/j.patcog.2026.113350_bib0005","unstructured":"G. Jocher, YOLOv5 by Ultralytics, 2020, url https:\/\/github.com\/ultralytics\/yolov5."},{"issue":"10","key":"10.1016\/j.patcog.2026.113350_bib0006","doi-asserted-by":"crossref","first-page":"6700","DOI":"10.1109\/TCSVT.2022.3168279","article-title":"Drone-based RGB-infrared cross-modality vehicle detection via uncertainty-aware learning","volume":"32","author":"Sun","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113350_bib0007","series-title":"European Conference on Computer Vision","first-page":"509","article-title":"Translation, scale and rotation: cross-modal alignment meets RGB-infrared vehicle detection","volume":"13669","author":"Yuan","year":"2022"},{"key":"10.1016\/j.patcog.2026.113350_bib0008","series-title":"International Joint Conference on Artificial Intelligence","first-page":"758","article-title":"CF-deformable DETR: an end-to-end alignment-free model for weakly aligned visible-infrared object detection","author":"Fu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113350_bib0009","series-title":"British Machine Vision Conference","first-page":"73.1","article-title":"Multispectral deep neural networks for pedestrian detection","author":"Liu","year":"2016"},{"key":"10.1016\/j.patcog.2026.113350_bib0010","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1016\/j.inffus.2018.09.015","article-title":"Cross-modality interactive attention network for multispectral pedestrian detection","volume":"50","author":"Zhang","year":"2019","journal-title":"Inf. Fusion"},{"issue":"3","key":"10.1016\/j.patcog.2026.113350_bib0011","doi-asserted-by":"crossref","first-page":"4145","DOI":"10.1109\/TNNLS.2021.3105143","article-title":"Weakly aligned feature fusion for multimodal object detection","volume":"36","author":"Zhang","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.113350_bib0012","doi-asserted-by":"crossref","unstructured":"Q. Fang, D. Han, Z. Wang, Cross-modality fusion transformer for multispectral object detection, (2021). arXiv: 2111.00273.","DOI":"10.2139\/ssrn.4227745"},{"key":"10.1016\/j.patcog.2026.113350_bib0013","first-page":"1","article-title":"C2Former: calibrated and complementary transformer for RGB-infrared object detection","volume":"62","author":"Yuan","year":"2024","journal-title":"IEEE Trans. Geosci. Remote. Sens."},{"issue":"7","key":"10.1016\/j.patcog.2026.113350_bib0014","doi-asserted-by":"crossref","first-page":"7101","DOI":"10.1109\/TCSVT.2025.3539625","article-title":"EI2Det: edge-guided illumination-aware interactive learning for visible-infrared object detection","volume":"35","author":"Hu","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"1","key":"10.1016\/j.patcog.2026.113350_bib0015","doi-asserted-by":"crossref","first-page":"547","DOI":"10.1109\/TCSVT.2024.3454631","article-title":"Multidimensional fusion network for multispectral object detection","volume":"35","author":"Yang","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113350_bib0016","series-title":"ACM International Conference on Multimedia","first-page":"1465","article-title":"Multispectral object detection via cross-modal conflict-aware learning","author":"He","year":"2023"},{"key":"10.1016\/j.patcog.2026.113350_bib0017","doi-asserted-by":"crossref","first-page":"4400","DOI":"10.1109\/TMM.2025.3543056","article-title":"Deformable cross-attention transformer for weakly aligned RGB-T pedestrian detection","volume":"27","author":"Hu","year":"2025","journal-title":"IEEE Trans. Multim."},{"key":"10.1016\/j.patcog.2026.113350_bib0018","series-title":"European Conference on Computer Vision","first-page":"787","article-title":"Improving multispectral pedestrian detection by addressing modality imbalance problems","volume":"12363","author":"Zhou","year":"2020"},{"key":"10.1016\/j.patcog.2026.113350_bib0019","first-page":"1","article-title":"Illumination-guided RGBT object detection with inter- and intra-modality fusion","volume":"72","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.patcog.2026.113350_bib0020","first-page":"1","article-title":"Cross-modal oriented object detection of UAV aerial images based on image feature","volume":"62","author":"Wang","year":"2024","journal-title":"IEEE Trans. Geosci. Remote. Sens."},{"issue":"9","key":"10.1016\/j.patcog.2026.113350_bib0021","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1007\/s10462-024-10877-1","article-title":"A comprehensive survey of deep learning-based lightweight object detection models for edge devices","volume":"57","author":"Mittal","year":"2024","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.patcog.2026.113350_bib0022","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124848","article-title":"EL-YOLO: an efficient and lightweight low-altitude aerial objects detector for onboard applications","volume":"256","author":"Xue","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.patcog.2026.113350_bib0023","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124661","article-title":"EL-Net: an efficient and lightweight optimized network for object detection in remote sensing images","volume":"255","author":"Dong","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.patcog.2026.113350_bib0024","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.126440","article-title":"Precision and speed: LSOD-YOLO for lightweight small object detection","volume":"269","author":"Wang","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.patcog.2026.113350_bib0025","series-title":"AAAI Conference on Artificial Intelligence","first-page":"16172","article-title":"Lightweight transformer for multi-modal object detection (Student abstract)","author":"Cao","year":"2023"},{"key":"10.1016\/j.patcog.2026.113350_bib0026","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10809","article-title":"MetaFormer is actually what you need for vision","author":"Yu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113350_bib0027","doi-asserted-by":"crossref","unstructured":"L. Hao, L. Xu, C. Liu, Y. Dong, LASFNet: a lightweight attention-guided self-modulation feature fusion network for multimodal object detection, (2025). arXiv: 2506.21018.","DOI":"10.1109\/TCYB.2025.3650459"},{"key":"10.1016\/j.patcog.2026.113350_bib0028","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102246","article-title":"Improving RGB-infrared object detection with cascade alignment-guided transformer","volume":"105","author":"Yuan","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.patcog.2026.113350_bib0029","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109913","article-title":"ICAFusion: iterative cross-attention guided feature fusion for multispectral object detection","volume":"145","author":"Shen","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113350_bib0030","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Generalized intersection over union: a metric and a loss for bounding box regression","author":"Rezatofighi","year":"2019"},{"issue":"1","key":"10.1016\/j.patcog.2026.113350_bib0031","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","article-title":"A tutorial on the cross-entropy method","volume":"134","author":"de Boer","year":"2005","journal-title":"Ann. Oper. Res."},{"key":"10.1016\/j.patcog.2026.113350_bib0032","series-title":"IEEE International Conference on Image Processing","first-page":"276","article-title":"Multispectral fusion for object detection with cyclic fuse-and-refine blocks","author":"Zhang","year":"2020"},{"key":"10.1016\/j.patcog.2026.113350_bib0033","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5792","article-title":"Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection","author":"Liu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113350_bib0034","series-title":"IEEE International Conference on Computer Vision Workshops","first-page":"3489","article-title":"LLVIP: a visible-infrared paired dataset for low-light vision","author":"Jia","year":"2021"},{"key":"10.1016\/j.patcog.2026.113350_bib0035","series-title":"Advances in Neural Information Processing Systems","first-page":"107984","article-title":"YOLOv10: real-time end-to-end object detection","volume":"37","author":"Wang","year":"2024"},{"issue":"2","key":"10.1016\/j.patcog.2026.113350_bib0036","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1080\/2150704X.2024.2305177","article-title":"Adaptive multimodal feature fusion with frequency domain gate for remote sensing object detection","volume":"15","author":"Sun","year":"2024","journal-title":"Remote Sens. Lett."},{"issue":"9","key":"10.1016\/j.patcog.2026.113350_bib0037","doi-asserted-by":"crossref","first-page":"9984","DOI":"10.1109\/TITS.2023.3266487","article-title":"Multi-modal feature pyramid transformer for RGB-infrared object detection","volume":"24","author":"Zhu","year":"2023","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"3","key":"10.1016\/j.patcog.2026.113350_bib0038","doi-asserted-by":"crossref","first-page":"3284","DOI":"10.1109\/TITS.2024.3522086","article-title":"CCLDet: a cross-modality and cross-domain low-Light detector","volume":"26","author":"Shang","year":"2025","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.113350_bib0039","series-title":"ACM International Conference on Multimedia","first-page":"4003","article-title":"DetFusion: a detection-driven infrared and visible image fusion network","author":"Sun","year":"2022"},{"issue":"12","key":"10.1016\/j.patcog.2026.113350_bib0040","doi-asserted-by":"crossref","first-page":"2121","DOI":"10.1109\/JAS.2022.106082","article-title":"SuperFusion: a versatile image registration and fusion network with semantic awareness","volume":"9","author":"Tang","year":"2022","journal-title":"IEEE CAA J. Autom. Sin."},{"key":"10.1016\/j.patcog.2026.113350_bib0041","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"19647","article-title":"RFNet: unsupervised network for mutually reinforcing multi-modal image registration and fusion","author":"Xu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113350_bib0042","series-title":"ACM International Conference on Multimedia","first-page":"4471","article-title":"Learning a graph neural network with cross modality interaction for image fusion","author":"Li","year":"2023"},{"key":"10.1016\/j.patcog.2026.113350_bib0043","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5906","article-title":"CDDFuse: correlation-driven dual-branch feature decomposition for multi-modality image fusion","author":"Zhao","year":"2023"},{"issue":"11","key":"10.1016\/j.patcog.2026.113350_bib0044","doi-asserted-by":"crossref","first-page":"11198","DOI":"10.1109\/TCSVT.2024.3418965","article-title":"MMI-Det: exploring multi-modal integration for visible and infrared object detection","volume":"34","author":"Zeng","year":"2024","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113350_bib0045","series-title":"IEEE Conference on Computer Vision and Pattern Recognition Workshops","first-page":"403","article-title":"Multimodal object detection by channel switching and spatial attention","author":"Cao","year":"2023"},{"key":"10.1016\/j.patcog.2026.113350_bib0046","series-title":"ACM International Conference on Multimedia","first-page":"2409-2418","article-title":"UniRGB-IR: a unified framework for visible-infrared semantic tasks via adapter tuning","author":"Yuan","year":"2025"},{"issue":"2","key":"10.1016\/j.patcog.2026.113350_bib0047","doi-asserted-by":"crossref","first-page":"2504","DOI":"10.1109\/TITS.2025.3638627","article-title":"Removal then selection: a coarse-to-fine fusion perspective for RGB-infrared object detection","volume":"27","author":"Zhao","year":"2026","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"11","key":"10.1016\/j.patcog.2026.113350_bib0048","doi-asserted-by":"crossref","first-page":"7449","DOI":"10.1109\/TIV.2024.3398429","article-title":"Misaligned visible-thermal object detection: a drone-based benchmark and baseline","volume":"9","author":"Song","year":"2024","journal-title":"IEEE Trans. Intell. Veh."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326003158?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326003158?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T18:59:38Z","timestamp":1774033178000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326003158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":48,"alternative-id":["S0031320326003158"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113350","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Lightweight modal-guided cross-attention fusion network for visible-infrared object detection","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113350","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113350"}}