{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T15:04:06Z","timestamp":1777907046781,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62431020"],"award-info":[{"award-number":["62431020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Foundation for Innovative Research Groups of Hubei Province","award":["2024AFA017"],"award-info":[{"award-number":["2024AFA017"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2042025kf0030"],"award-info":[{"award-number":["2042025kf0030"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755841","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"2294-2303","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Multimodal Decomposed Distillation with Instance Alignment and Uncertainty Compensation for Thermal Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0122-4463","authenticated-orcid":false,"given":"Yanfeng","family":"Liu","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Multimedia Software, School of Computer Science, Wuhan University, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0542-2280","authenticated-orcid":false,"given":"Lefei","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Multimedia Software, School of Computer Science, Wuhan University, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01467"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475679"},{"key":"e_1_3_2_1_3_1","first-page":"15394","article-title":"PKD: General Distillation Framework for Object Detectors via Pearson Correlation Coefficient","author":"Cao Weihan","year":"2022","unstructured":"Weihan Cao, Yifan Zhang, Jianfei Gao, Anda Cheng, Ke Cheng, and Jian Cheng. 2022. PKD: General Distillation Framework for Object Detectors via Pearson Correlation Coefficient. In Advances in Neural Information Processing Systems (NeurIPS). 15394-15406.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02534"},{"key":"e_1_3_2_1_5_1","unstructured":"Guobin Chen Wongun Choi Xiang Yu Tony Han and Manmohan Chandraker. 2017. Learning Efficient Object Detection Models with Knowledge Distillation. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680656"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01281"},{"key":"e_1_3_2_1_8_1","first-page":"5213","article-title":"Distilling Object Detectors with Feature Richness","author":"Du Zhixing","year":"2021","unstructured":"Zhixing Du, Rui Zhang, Ming Chang, Xishan Zhang, Shaoli Liu, Tianshi Chen, and Yunji Chen. 2021. Distilling Object Detectors with Feature Richness. In Advances in Neural Information Processing Systems (NeurIPS). 5213-5224.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00042"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI). 758-766","author":"Fu Haolong","year":"2024","unstructured":"Haolong Fu, Jin Yuan, Guojin Zhong, Xuan He, Jiacheng Lin, and Zhiyong Li. 2024. CF-Deformable DETR: An End-to-End Alignment-Free Model for Weakly Aligned Visible-Infrared Object Detection. In Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI). 758-766."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV). 464-481","author":"Guo Junjie","year":"2024","unstructured":"Junjie Guo, Chenqiang Gao, Fangcen Liu, Deyu Meng, and Xinbo Gao. 2024. DAMSDet: Dynamic Adaptive Multispectral Detection Transformer with\u00a0Competitive Query Selection and\u00a0Adaptive Feature Fusion. In Proceedings of the European Conference on Computer Vision (ECCV). 464-481."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.309"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2844175"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612651"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095353"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681579"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28016"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10097119"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01515"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00235"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2023.3262129"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.776"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR). 1-11","author":"Li Yunsheng","year":"2021","unstructured":"Yunsheng Li, Yinpeng Chen, Xiyang Dai, Mengchen Liu, Dongdong Chen, Ye Yu, Lu Yuan, Zicheng Liu, Mei Chen, and Nuno Vasconcelos. 2021. Revisiting Dynamic Convolution via Matrix Decomposition. In Proceedings of the International Conference on Learning Representations (ICLR). 1-11."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548416"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2025.3577046"},{"key":"e_1_3_2_1_29_1","first-page":"1","article-title":"d. ABNet: Adaptive Balanced Network for Multiscale Object Detection in Remote Sensing Imagery","volume":"60","author":"Liu Yanfeng","year":"2022","unstructured":"Yanfeng Liu, Qiang Li, Yuan Yuan, Qian Du, and Qi Wang. 2022 d. ABNet: Adaptive Balanced Network for Multiscale Object Detection in Remote Sensing Imagery. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2022), 1-14.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746853"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680878"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML). 1-13","author":"Ma Wenxuan","year":"2024","unstructured":"Wenxuan Ma, Shuang Li, Lincan Cai, and Jingxuan Kang. 2024. Learning Modality Knowledge Alignment for Cross-Modality Transfer. In Proceedings of the International Conference on Machine Learning (ICML). 1-13."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28219"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/142"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2015.11.002"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR). 1-9.","author":"Romero Adriana","year":"2015","unstructured":"Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2015. Fitnets: Hints for Thin Deep Nets. In Proceedings of the International Conference on Learning Representations (ICLR). 1-9."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01489"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547902"},{"key":"e_1_3_2_1_39_1","first-page":"1922","article-title":"FCOS: A Simple and Strong Anchor-Free Object Detector","volume":"44","author":"Tian Zhi","year":"2022","unstructured":"Zhi Tian, Chunhua Shen, Hao Chen, and Tong He. 2022. FCOS: A Simple and Strong Anchor-Free Object Detector. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 4 (2022), 1922-1933.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2024.3479717"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3520-3530","author":"Poojan Oza Vibashan VS","unstructured":"Vibashan VS, Poojan Oza, and Vishal M. Patel. 2023. Instance Relation Graph Guided Source-Free Domain Adaptive Object Detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3520-3530."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01563"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00507"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02503"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Xue Zihui","year":"2023","unstructured":"Zihui Xue, Zhengqi Gao, Sucheng Ren, and Hang Zhao. 2023. The Modality Focusing Hypothesis: Towards Understanding Crossmodal Knowledge Distillation. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV). 123-138","author":"Yang Chenhongyi","unstructured":"Chenhongyi Yang, Mateusz Ochal, Amos Storkey, and Elliot J. Crowley. 2022b. Prediction-Guided Distillation for\u00a0Dense Object Detection. In Proceedings of the European Conference on Computer Vision (ECCV). 123-138."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00460"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV). 509-525","author":"Yuan Maoxun","year":"2022","unstructured":"Maoxun Yuan, Yinyan Wang, and Xingxing Wei. 2022. Translation, Scale and\u00a0Rotation: Cross-Modal Alignment Meets RGB-Infrared Vehicle Detection. In Proceedings of the European Conference on Computer Vision (ECCV). 509-525."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9191080"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28533"},{"key":"e_1_3_2_1_51_1","first-page":"52296","article-title":"E2E-MFD","author":"Zhang Jiaqing","year":"2024","unstructured":"Jiaqing Zhang, Mingxiang Cao, Weiying Xie, Jie Lei, Daixun Li, Wenbo Huang, Yunsong Li, and Xue Yang. 2024a. E2E-MFD: Towards End-to-End Synchronous Multimodal Fusion Detection. In Advances in Neural Information Processing Systems (NeurIPS). 52296-52322.","journal-title":"Towards End-to-End Synchronous Multimodal Fusion Detection. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3300470"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20240"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00523"},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 15470-15480","author":"Zhao Lingjun","unstructured":"Lingjun Zhao, Jingyu Song, and Katherine A. Skinner. 2024. CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 15470-15480."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01341"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01652"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3248583"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755841","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:14:20Z","timestamp":1765340060000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755841"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":58,"alternative-id":["10.1145\/3746027.3755841","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755841","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}