{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T21:02:53Z","timestamp":1776200573781,"version":"3.50.1"},"reference-count":91,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2023YFC3306401"],"award-info":[{"award-number":["2023YFC3306401"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102151"],"award-info":[{"award-number":["62102151"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Open Research Fund of Key Laboratory of Advanced Theory and Application in Statistics and Data Science"},{"DOI":"10.13039\/100010449","name":"Ministry of Education","doi-asserted-by":"publisher","award":["KLATASDS2305"],"award-info":[{"award-number":["KLATASDS2305"]}],"id":[{"id":"10.13039\/100010449","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fundamental Research Funds for the Central Universities, in part by the Zhejiang Provincial Natural Science Foundation of China","award":["LD24F020007"],"award-info":[{"award-number":["LD24F020007"]}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L223024"],"award-info":[{"award-number":["L223024"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L244043"],"award-info":[{"award-number":["L244043"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["Z241100001324017"],"award-info":[{"award-number":["Z241100001324017"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Thousand Plan&#x201D; Projects in Jiangxi Province","award":["Jxsq2023102268"],"award-info":[{"award-number":["Jxsq2023102268"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tmm.2025.3599020","type":"journal-article","created":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T18:48:19Z","timestamp":1755197299000},"page":"7392-7406","source":"Crossref","is-referenced-by-count":41,"title":["Fusion-Mamba for Cross-Modality Object Detection"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-4921-1834","authenticated-orcid":false,"given":"Wenhao","family":"Dong","sequence":"first","affiliation":[{"name":"School of Astronautics, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2569-4965","authenticated-orcid":false,"given":"Haodong","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0284-9940","authenticated-orcid":false,"given":"Shaohui","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7256-4329","authenticated-orcid":false,"given":"Xiaoyan","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Astronautics, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3970-7519","authenticated-orcid":false,"given":"Yunhang","family":"Shen","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9583-0055","authenticated-orcid":false,"given":"Guodong","family":"Guo","sequence":"additional","affiliation":[{"name":"Ningbo Institute of Digital Twin, Eastern Institute of Technology, Ningbo, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7396-6218","authenticated-orcid":false,"given":"Baochang","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beihang University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3076466"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3046868"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3241196"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3375505"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3400675"},{"key":"ref6","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. 3 rd Int. Conf. Learn. Representations","author":"Simonyan","year":"2015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.73"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00012"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3238181"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4227745"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109913"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3390\/s20010281"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917332"},{"key":"ref18","first-page":"1","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","volume-title":"Proc. Conf. Lang. Model.","author":"Gu","year":"2024"},{"key":"ref19","first-page":"103031","article-title":"Vmamba: Visual state space model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu","year":"2024"},{"key":"ref20","first-page":"62429","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Zhu","year":"2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2887342"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2018.09.004"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2765202"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3433555"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3410113"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2022.106082"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3390\/rs14092020"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00046"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3160589"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3272471"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3350926"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2018.08.005"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_9"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3306870"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3381377"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-15333-w"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3375634"},{"key":"ref43","article-title":"Removal and selection: Improving rgb-infrared object detection via coarse-to-fine fusion","author":"Zhao","year":"2024"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3266487"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2024.02.012"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3376819"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3767748"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-5128-0_27"},{"key":"ref49","article-title":"U-mamba: Enhancing long-range dependency for biomedical image segmentation","author":"Ma","year":"2024"},{"key":"ref50","article-title":"Mamba-unet: Unet-like pure visual mamba for medical image segmentation","author":"Wang","year":"2024"},{"key":"ref51","article-title":"Video mamba suite: State space model as a versatile alternative for video understanding","author":"Chen","year":"2024"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3199107"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00095"},{"key":"ref54","first-page":"1","article-title":"ultralytics\/yolov5: v6.2-YOLOv5 classification models, apple M1, reproducibility, ClearML and DECI.AI integrations","volume":"10","author":"Jocher","year":"2022","journal-title":"Zenodo"},{"key":"ref55","article-title":"Ultralytics YOLO","author":"Jocher","year":"2023"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00389"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3168279"},{"key":"ref59","article-title":"Free teledyne flir thermal dataset for algorithm training","author":"FLIR","year":"2024"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3272269"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9191080"},{"key":"ref62","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst. 28: Annu. Conf. Neural Inf. Process. Syst.","author":"Ren","year":"2015"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00644"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00708"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547895"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612651"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.034"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3266452"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICHMS59971.2024.10555757"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/135"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01501-8"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01906"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547902"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612135"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00523"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_30"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2024.3373816"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02534"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1080\/2150704X.2024.2305177"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3367934"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2024.103918"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803104"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00130"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.l007\/978-3-319-46448-0_2"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.3390\/rs16203904"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2024.111971"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3012548"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19797-0_41"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3268209"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.007"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/10844992\/11124513.pdf?arnumber=11124513","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T17:25:20Z","timestamp":1761153920000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11124513\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":91,"URL":"https:\/\/doi.org\/10.1109\/tmm.2025.3599020","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}