{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:01Z","timestamp":1750309501372,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T00:00:00Z","timestamp":1737331200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,1,20]]},"DOI":"10.1145\/3658617.3697632","type":"proceedings-article","created":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T14:32:21Z","timestamp":1741098741000},"page":"1126-1132","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Dual-branch cross-modal fusion with local-to-global learning for UAV object detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8517-4194","authenticated-orcid":false,"given":"Binyi","family":"Fang","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1121-007X","authenticated-orcid":false,"given":"Yixin","family":"Yang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8397-0042","authenticated-orcid":false,"given":"Jingjing","family":"Chang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6357-145X","authenticated-orcid":false,"given":"Ziyang","family":"Gao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7046-3455","authenticated-orcid":false,"given":"Hai-Bao","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong Univ., Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"7369","volume-title":"Dynamic head: Unifying object detection heads with attentions,\" in 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Dai X.","year":"2021","unstructured":"X. Dai, Y. Chen, B. Xiao, D. Chen, M. Liu, L. Yuan, and L. Zhang, \"Dynamic head: Unifying object detection heads with attentions,\" in 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, pp. 7369--7378."},{"key":"e_1_3_2_1_2_1","first-page":"51","volume-title":"Vision and Computing (ICIVC)","author":"Zhang S.","year":"2023","unstructured":"S. Zhang, T. Xie, Y. Wang, Y. Liu, Y. Dou, and S. Wang, \"Sf-yolo: Rgb-t fusion object detection in uav scenes,\" in 2023 8th International Conference on Image, Vision and Computing (ICIVC), 2023, pp. 51--59."},{"key":"e_1_3_2_1_3_1","first-page":"1","volume-title":"Multi-modal pedestrian detection with large misalignment based on modal-wise regression and multi-modal iou,\" in 2021 17th International Conference on Machine Vision and Applications (MVA)","author":"Wanchaitanawong N.","year":"2021","unstructured":"N. Wanchaitanawong, M. Tanaka, T. Shibata, and M. Okutomi, \"Multi-modal pedestrian detection with large misalignment based on modal-wise regression and multi-modal iou,\" in 2021 17th International Conference on Machine Vision and Applications (MVA), 2021, pp. 1--6."},{"key":"e_1_3_2_1_4_1","first-page":"6274","volume-title":"Deep modular co-attention networks for visual question answering,\" in 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Yu Z.","year":"2019","unstructured":"Z. Yu, J. Yu, Y. Cui, D. Tao, and Q. Tian, \"Deep modular co-attention networks for visual question answering,\" in 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2019, pp. 6274--6283."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3172183"},{"key":"e_1_3_2_1_6_1","first-page":"72","volume-title":"Guided attentive feature fusion for multispectral pedestrian detection,\" in 2021 IEEE Winter Conference on Applications of Computer Vision (WACV)","author":"Zhang H.","year":"2021","unstructured":"H. Zhang, E. Fromont, S. Lefever, and B. Avignon, \"Guided attentive feature fusion for multispectral pedestrian detection,\" in 2021 IEEE Winter Conference on Applications of Computer Vision (WACV), 2021, pp. 72--80."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2018.08.005"},{"key":"e_1_3_2_1_8_1","first-page":"1","volume-title":"Pedestrian detection by fusion of rgb and infrared images in low-light environment,\" in 2021 IEEE 24th International Conference on Information Fusion (FUSION)","author":"Deng Q.","year":"2021","unstructured":"Q. Deng, W. Tian, Y. Huang, L. Xiong, and X. Bi, \"Pedestrian detection by fusion of rgb and infrared images in low-light environment,\" in 2021 IEEE 24th International Conference on Information Fusion (FUSION), 2021, pp. 1--8."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11010001"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-022-10991-7"},{"key":"e_1_3_2_1_11_1","first-page":"2920","volume-title":"Baanet: Learning bi-directional adaptive attention gates for multispectral pedestrian detection,\" in 2022 International Conference on Robotics and Automation (ICRA)","author":"Yang X.","year":"2022","unstructured":"X. Yang, Y. Qian, H. Zhu, C. Wang, and M. Yang, \"Baanet: Learning bi-directional adaptive attention gates for multispectral pedestrian detection,\" in 2022 International Conference on Robotics and Automation (ICRA), 2022, pp. 2920--2926."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108786"},{"key":"e_1_3_2_1_13_1","first-page":"1222","article-title":"Perceptual generative adversarial networks for small object detection","author":"Li J.","year":"2017","unstructured":"J. Li, X. Liang, Y. Wei, T. Xu, J. Feng, and S. Yan, \"Perceptual generative adversarial networks for small object detection,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 1222--1230.","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"e_1_3_2_1_14_1","first-page":"1","article-title":"Fanet: An arbitrary direction remote sensing object detection network based on feature fusion and angle classification","volume":"61","author":"Zhang Y.","year":"2023","unstructured":"Y. Zhang, W. Guo, C. Wu, W. Li, and R. Tao, \"Fanet: An arbitrary direction remote sensing object detection network based on feature fusion and angle classification,\" IEEE Transactions on Geoscience and Remote Sensing, vol. 61, pp. 1--11, 2023.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3229571"},{"key":"e_1_3_2_1_16_1","first-page":"936","volume-title":"Feature pyramid networks for object detection,\" in 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Lin T.-Y.","year":"2017","unstructured":"T.-Y. Lin, P. Doll\u00e1r, R. Girshick, K. He, B. Hariharan, and S. Belongie, \"Feature pyramid networks for object detection,\" in 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017, pp. 936--944."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"J.-S. Lim M. Astrid H.-J. Yoon and S.-I. Lee \"Small object detection using context and attention \" in 2021 International Conference on Artificial Intelligence in Information and Communication (ICAIIC) 2021 pp. 181--186.","DOI":"10.1109\/ICAIIC51459.2021.9415217"},{"key":"e_1_3_2_1_18_1","first-page":"2184","volume-title":"IEEE","author":"Yang G.","year":"2023","unstructured":"G. Yang, J. Lei, Z. Zhu, S. Cheng, Z. Feng, and R. Liang, \"Afpn: Asymptotic feature pyramid network for object detection,\" in 2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC). IEEE, 2023, pp. 2184--2189."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_20_1","first-page":"2849","article-title":"Learning roi transformer for oriented object detection in aerial images","author":"Ding J.","year":"2019","unstructured":"J. Ding, N. Xue, Y. Long, G.-S. Xia, and Q. Lu, \"Learning roi transformer for oriented object detection in aerial images,\" in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 2849--2858.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"e_1_3_2_1_21_1","first-page":"2785","volume-title":"A rotation-equivariant detector for aerial object detection,\" in 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Han J.","year":"2021","unstructured":"J. Han, J. Ding, N. Xue, and G.-S. Xia, \"Redet: A rotation-equivariant detector for aerial object detection,\" in 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, pp. 2785--2794."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2974745"},{"key":"e_1_3_2_1_23_1","first-page":"3489","volume-title":"Llvip: A visible-infrared paired dataset for low-light vision,\" 2021 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW)","author":"Jia X.","year":"2021","unstructured":"X. Jia, C. Zhu, M. Li, W. Tang, and W. Zhou, \"Llvip: A visible-infrared paired dataset for low-light vision,\" 2021 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW), pp. 3489--3497, 2021."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3168279"},{"key":"e_1_3_2_1_25_1","first-page":"509","volume-title":"Translation, scale and rotation: cross-modal alignment meets rgb-infrared vehicle detection,\" in European Conference on Computer Vision","author":"Yuan M.","year":"2022","unstructured":"M. Yuan, Y. Wang, and X. Wei, \"Translation, scale and rotation: cross-modal alignment meets rgb-infrared vehicle detection,\" in European Conference on Computer Vision. Springer, 2022, pp. 509--525."},{"key":"e_1_3_2_1_26_1","volume-title":"Multispectral object detection via cross-modal conflict-aware learning,\" Proceedings of the 31st ACM International Conference on Multimedia","author":"He X.","year":"2023","unstructured":"X. He, C. Tang, X. Zou, and W. Zhang, \"Multispectral object detection via cross-modal conflict-aware learning,\" Proceedings of the 31st ACM International Conference on Multimedia, 2023."},{"key":"e_1_3_2_1_27_1","first-page":"1","volume-title":"A novel object detection algorithm with enhanced performance and robustness,\" in 2024 International Conference on Advances in Data Engineering and Intelligent Computing Systems (ADICS)","author":"Varghese R.","year":"2024","unstructured":"R. Varghese and S. M., \"Yolov8: A novel object detection algorithm with enhanced performance and robustness,\" in 2024 International Conference on Advances in Data Engineering and Intelligent Computing Systems (ADICS), 2024, pp. 1--6."}],"event":{"name":"ASPDAC '25: 30th Asia and South Pacific Design Automation Conference","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEICE","IPSJ","IEEE CAS","IEEE CEDA"],"location":"Tokyo Japan","acronym":"ASPDAC '25"},"container-title":["Proceedings of the 30th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697632","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658617.3697632","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:49Z","timestamp":1750295869000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697632"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,20]]},"references-count":27,"alternative-id":["10.1145\/3658617.3697632","10.1145\/3658617"],"URL":"https:\/\/doi.org\/10.1145\/3658617.3697632","relation":{},"subject":[],"published":{"date-parts":[[2025,1,20]]},"assertion":[{"value":"2025-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}