{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:56:21Z","timestamp":1781535381133,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62462049"],"award-info":[{"award-number":["62462049"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810666","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1442-1449","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ICMF-Net: Interactive Cross-Modal Fusion with Attention and Selection Network for Remote Sensing Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-1634-7539","authenticated-orcid":false,"given":"Kun","family":"Yao","sequence":"first","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Hohhot, Inner Mongolia, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3232-3742","authenticated-orcid":false,"given":"Hao","family":"Zeng","sequence":"additional","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Hohhot, Inner Mongolia, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8785-6470","authenticated-orcid":false,"given":"Yida","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Hohhot, Inner Mongolia, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2060-2266","authenticated-orcid":false,"given":"Ming","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Computer Science, Inner Mongolia University, Hohhot, Inner Mongolia, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Nicolas Audebert Bertrand Le Saux and S\u00e9bastien Lef\u00e8vre. 2017. Segment-before-detect: Vehicle detection and classification through semantic segmentation of aerial images. Remote Sens. 9 4 (2017) 368.","DOI":"10.3390\/rs9040368"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS52108.2023.10281486"},{"key":"e_1_3_3_1_4_2","unstructured":"Yue Cao Jiarui Xu Stephen Lin Fangyun Wei and Han Hu. 2019. GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. arxiv:https:\/\/arXiv.org\/abs\/1904.11492\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1904.11492"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"e_1_3_3_1_6_2","first-page":"236","volume-title":"Proc. Int. Conf. Pattern Recognit. (ICPR)","author":"Chen Yishuo","year":"2024","unstructured":"Yishuo Chen, Boran Wang, Xinyu Guo, Wenbin Zhu, Jiasheng He, Xiaobin Liu, and Jing Yuan. 2024. DEYOLO: Dual-feature-enhancement YOLO for cross-modality object detection. In Proc. Int. Conf. Pattern Recognit. (ICPR). 236\u2013252."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","unstructured":"Xiaomin Fan Wujie Zhou Xiaohong Qian and Weiqing Yan. 2024. Progressive Adjacent-Layer coordination symmetric cascade network for semantic segmentation of Multimodal remote sensing images. Expert Syst. Appl. 238 (2024) 121999. 10.1016\/j.eswa.2023.121999","DOI":"10.1016\/j.eswa.2023.121999"},{"key":"e_1_3_3_1_8_2","volume-title":"Proc. IEEE Int. Conf. Comput. Vis. (ICCV) Workshops","author":"Fang Qingyun","year":"2021","unstructured":"Qingyun Fang, Dapeng Han, and Zhaokui Wang. 2021. Cross-modality fusion transformer for multispectral object detection. In Proc. IEEE Int. Conf. Comput. Vis. (ICCV) Workshops."},{"key":"e_1_3_3_1_9_2","volume-title":"Proc. IEEE Int. Conf. Pattern Recognit. (ICPR)","author":"Fang Qingyun","year":"2022","unstructured":"Qingyun Fang and Zhaokui Wang. 2022. Cross-modality attentive feature fusion for object detection in multispectral remote sensing imagery. In Proc. IEEE Int. Conf. Pattern Recognit. (ICPR)."},{"key":"e_1_3_3_1_10_2","unstructured":"Junjie Guo Chenqiang Gao Fangcen Liu Deyu Meng and Xinbo Gao. 2024. DAMSDet: Dynamic Adaptive Multispectral Detection Transformer with Competitive Query Selection and Adaptive Feature Fusion. arxiv:https:\/\/arXiv.org\/abs\/2403.00326\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.00326"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Shumeng He Houqun Yang Xiaoying Zhang and Xuanyu Li. 2023. MFTransNet: A Multi-Modal Fusion with CNN-Transformer Network for Semantic Segmentation of HSR Remote Sensing Images. Mathematics 11 3 (2023).","DOI":"10.3390\/math11030722"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2021.12.007"},{"key":"e_1_3_3_1_13_2","unstructured":"Qibin Hou Daquan Zhou and Jiashi Feng. 2021. Coordinate Attention for Efficient Mobile Network Design. arxiv:https:\/\/arXiv.org\/abs\/2103.02907\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2103.02907"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_3_1_15_2","unstructured":"Bingyu Li Da Zhang Zhiyuan Zhao Junyu Gao and Xuelong Li. 2025. StitchFusion: Weaving Any Visual Modalities to Enhance Multimodal Semantic Segmentation. arxiv:https:\/\/arXiv.org\/abs\/2408.01343\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2408.01343"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Jiafeng Li Ying Wen and Lianghua He. 2023. SCConv: Spatial and Channel Reconstruction Convolution for Feature Redundancy. IEEE Trans. Pattern Anal. Mach. Intell. 45 8 (2023) 6153\u20136162.","DOI":"10.1109\/CVPR52729.2023.00596"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","unstructured":"Xiangyang Li Shiguo Chen Chunna Tian Heng Zhou and Zhenxi Zhang. 2024. M2FNet: Mask-Guided Multi-Level Fusion for RGB-T Pedestrian Detection. IEEE Trans. Multim. 26 (2024) 8678\u20138690. 10.1109\/TMM.2024.3381377","DOI":"10.1109\/TMM.2024.3381377"},{"key":"e_1_3_3_1_18_2","unstructured":"Xia Li Zeming Li Kunyu Li Yanhui Zhang Gaofeng Cheng Jianping Shi Jian Sun and Jiaya Jia. 2020. Efficient Multi-Scale Attention for Semantic Segmentation. Int. J. Comput. Vis. 128 4 (2020) 27\u201342."},{"key":"e_1_3_3_1_19_2","volume-title":"Proc. Eur. Conf. Comput. Vis. (ECCV)","author":"Liu Huayao","year":"2022","unstructured":"Huayao Liu, Jiaming Zhang, Kailun Yang, Xinxin Hu, Ruiping Liu, and Rainer Stiefelhagen. 2022. CMX: Cross-Modal Fusion for RGB-X Semantic Segmentation with Transformers. In Proc. Eur. Conf. Comput. Vis. (ECCV)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"Jiabao Ma Wujie Zhou Jingsheng Lei and Lu Yu. 2023. Adjacent Bi-Hierarchical Network for Scene Parsing of Remote Sensing Images. IEEE Geosci. Remote Sens. Lett. 20 (2023) 1\u20135. 10.1109\/LGRS.2023.3241648","DOI":"10.1109\/LGRS.2023.3241648"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_3_1_23_2","volume-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Adv. Neural Inf. Process. Syst. (NeurIPS)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Jifeng Shen Yifei Chen Yue Liu Xin Zuo Heng Fan and Wankou Yang. 2024. ICAFusion: Iterative cross-attention guided feature fusion for multispectral object detection. Pattern Recognit. 145 (2024) 109913.","DOI":"10.1016\/j.patcog.2023.109913"},{"key":"e_1_3_3_1_25_2","unstructured":"Raja Sunkara and Tie Luo. 2022. No More Strided Convolutions or Pooling: A New CNN Building Block for Low-Resolution Images and Small Objects. arxiv:https:\/\/arXiv.org\/abs\/2208.03641\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2208.03641"},{"key":"e_1_3_3_1_26_2","volume-title":"Proc. IEEE Int. Conf. Multim. Expo (ICME)","author":"Wan Dahang","year":"2025","unstructured":"Dahang Wan, Rongsheng Lu, Yang Fang, Xianli Lang, Shuangbao Shu, Jingjing Chen, Siyuan Shen, Ting Xu, and Zecong Ye. 2025. YOLOv11-RGBT: Towards a Comprehensive Single-Stage Multispectral Object Detection Framework. In Proc. IEEE Int. Conf. Multim. Expo (ICME)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","unstructured":"Qilong Wang Banggu Wu Pengfei Zhu Peihua Li Wangmeng Zuo and Qinghua Hu. 2020. ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks. Proc. IEEE Conf. Comput. Vis. Pattern Recog. (CVPR) (2020) 11531\u201311539. 10.1109\/CVPR42600.2020.01155","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3754806"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Jiaqing Zhang Jie Lei Weiying Xie Zhenman Fang Yunsong Li and Qian Du. 2023. SuperYOLO: Super resolution assisted object detection in multimodal remote sensing imagery. IEEE Trans. Geosci. Remote Sens. 61 (2023) 1\u201315.","DOI":"10.1109\/TGRS.2023.3258666"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","unstructured":"Xue Zhang Si-Yuan Cao Fang Wang Runmin Zhang Zhe Wu Xiaohan Zhang Xiaokai Bai and Hui-Liang Shen. 2025. Rethinking Early-Fusion Strategies for Improved Multispectral Object Detection. IEEE Trans. Intell. Veh. 10 6 (2025) 3728\u20133742. 10.1109\/TIV.2024.3462488","DOI":"10.1109\/TIV.2024.3462488"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:51:08Z","timestamp":1781535068000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810666"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":30,"alternative-id":["10.1145\/3805622.3810666","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810666","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}