{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T21:20:57Z","timestamp":1778707257825,"version":"3.51.4"},"reference-count":68,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,4,15]],"date-time":"2025-04-15T00:00:00Z","timestamp":1744675200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,15]],"date-time":"2025-04-15T00:00:00Z","timestamp":1744675200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276192"],"award-info":[{"award-number":["62276192"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11263-025-02427-1","type":"journal-article","created":{"date-parts":[[2025,4,15]],"date-time":"2025-04-15T02:55:25Z","timestamp":1744685725000},"page":"5262-5280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":32,"title":["C2RF: Bridging Multi-modal Image Registration and Fusion via Commonality Mining and Contrastive Learning"],"prefix":"10.1007","volume":"133","author":[{"given":"Linfeng","family":"Tang","sequence":"first","affiliation":[]},{"given":"Qinglong","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Xinyu","family":"Xiang","sequence":"additional","affiliation":[]},{"given":"Leyuan","family":"Fang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3264-3265","authenticated-orcid":false,"given":"Jiayi","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,15]]},"reference":[{"key":"2427_CR1","doi-asserted-by":"crossref","unstructured":"Arar, M., Ginger, Y., Danon, D., Bermano, A.H., & Cohen-Or, D. (2020). Unsupervised multi-modal image registration via geometry preserving image-to-image translation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 13410\u201313419","DOI":"10.1109\/CVPR42600.2020.01342"},{"issue":"1","key":"2427_CR2","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.media.2007.06.004","volume":"12","author":"BB Avants","year":"2008","unstructured":"Avants, B. B., Epstein, C. L., Grossman, M., & Gee, J. C. (2008). Symmetric diffeomorphic image registration with cross-correlation: Evaluating automated labeling of elderly and neurodegenerative brain. Medical Image Analysis, 12(1), 26\u201341.","journal-title":"Medical Image Analysis"},{"issue":"7971","key":"2427_CR3","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1038\/s41586-023-06174-6","volume":"619","author":"F Bao","year":"2023","unstructured":"Bao, F., Wang, X., Sureshbabu, S. H., Sreekumar, G., Yang, L., Aggarwal, V., Boddeti, V. N., & Jacob, Z. (2023). Heat-assisted detection and ranging. Nature, 619(7971), 743\u2013748.","journal-title":"Nature"},{"key":"2427_CR4","doi-asserted-by":"publisher","first-page":"5147","DOI":"10.1109\/TIP.2020.2980972","volume":"29","author":"SY Cao","year":"2020","unstructured":"Cao, S. Y., Shen, H. L., Chen, S. J., & Li, C. (2020). Boosting structure consistency for multispectral and multimodal image registration. IEEE Transactions on Image Processing, 29, 5147\u20135162.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"3","key":"2427_CR5","doi-asserted-by":"publisher","first-page":"1297","DOI":"10.1109\/TIP.2017.2776753","volume":"27","author":"SJ Chen","year":"2017","unstructured":"Chen, S. J., Shen, H. L., Li, C., & Xin, J. H. (2017). Normalized total gradient: A new measure for multispectral image registration. IEEE Transactions on Image Processing, 27(3), 1297\u20131310.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2427_CR6","unstructured":"Chen, T., Kornblith, S., Norouzi, M., & Hinton, G. (2020). A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607"},{"key":"2427_CR7","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1016\/j.infrared.2016.05.012","volume":"77","author":"Z Fu","year":"2016","unstructured":"Fu, Z., Wang, X., Xu, J., Zhou, N., & Zhao, Y. (2016). Infrared and visible images fusion based on rpca and nsct. Infrared Physics & Technology, 77, 114\u2013123.","journal-title":"Infrared Physics & Technology"},{"key":"2427_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., & Girshick, R. (2020). Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2427_CR9","unstructured":"Henaff, O. (2020). Data-efficient image recognition with contrastive predictive coding. In: International Conference on Machine Learning, pp. 4182\u20134192"},{"key":"2427_CR10","doi-asserted-by":"crossref","unstructured":"Huang, Z., Liu, J., Fan, X., Liu, R., Zhong, W., & Luo, Z. (2022). Reconet: Recurrent correction network for fast and efficient multi-modality image fusion. In: Proceedings of the European Conference on Computer Vision, pp. 539\u2013555","DOI":"10.1007\/978-3-031-19797-0_31"},{"key":"2427_CR11","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.inffus.2013.12.002","volume":"19","author":"AP James","year":"2014","unstructured":"James, A. P., & Dasarathy, B. V. (2014). Medical image fusion: A survey of the state of the art. Information Fusion, 19, 4\u201319.","journal-title":"Information Fusion"},{"key":"2427_CR12","doi-asserted-by":"crossref","unstructured":"Jia, X., Bartlett, J., Chen, W., Song, S., Zhang, T., Cheng, X., Lu, W., Qiu, Z., & Duan, J. (2023). Fourier-net: Fast image registration with band-limited deformation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 1015\u20131023","DOI":"10.1609\/aaai.v37i1.25182"},{"issue":"9","key":"2427_CR13","doi-asserted-by":"publisher","first-page":"1712","DOI":"10.1109\/TPAMI.2016.2615619","volume":"39","author":"S Kim","year":"2016","unstructured":"Kim, S., Min, D., Ham, B., Do, M. N., & Sohn, K. (2016). Dasc: Robust dense descriptor for multi-modal and multi-spectral correspondence estimation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(9), 1712\u20131729.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2427_CR14","doi-asserted-by":"crossref","unstructured":"Lee, H.Y., Tseng, H.Y., Huang, J.B., Singh, M., & Yang, M.H. (2018). Diverse image-to-image translation via disentangled representations. In: Proceedings of the European Conference on Computer Vision, pp. 35\u201351","DOI":"10.1007\/978-3-030-01246-5_3"},{"issue":"5","key":"2427_CR15","doi-asserted-by":"publisher","first-page":"1625","DOI":"10.1007\/s11263-023-01948-x","volume":"132","author":"H Li","year":"2023","unstructured":"Li, H., Liu, J., Zhang, Y., & Liu, Y. (2023). A deep learning framework for infrared and visible image fusion without strict registration. International Journal of Computer Vision, 132(5), 1625\u20131644.","journal-title":"International Journal of Computer Vision"},{"issue":"5","key":"2427_CR16","doi-asserted-by":"publisher","first-page":"2614","DOI":"10.1109\/TIP.2018.2887342","volume":"28","author":"H Li","year":"2019","unstructured":"Li, H., & Wu, X. J. (2019). Densefuse: A fusion approach to infrared and visible images. IEEE Transactions on Image Processing, 28(5), 2614\u20132623.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2427_CR17","doi-asserted-by":"publisher","first-page":"4733","DOI":"10.1109\/TIP.2020.2975984","volume":"29","author":"H Li","year":"2020","unstructured":"Li, H., Wu, X. J., & Kitler, J. (2020). Mdlatlrr: A novel decomposition method for infrared and visible image fusion. IEEE Transactions on Image Processing, 29, 4733\u20134746.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"9","key":"2427_CR18","doi-asserted-by":"publisher","first-page":"11040","DOI":"10.1109\/TPAMI.2023.3268209","volume":"45","author":"H Li","year":"2023","unstructured":"Li, H., Xu, T., Wu, X. J., Lu, J., & Kittler, J. (2023). Lrrnet: A novel representation learning guided fusion network for infrared and visible images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(9), 11040\u201311052.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2427_CR19","doi-asserted-by":"publisher","first-page":"3296","DOI":"10.1109\/TIP.2019.2959244","volume":"29","author":"J Li","year":"2020","unstructured":"Li, J., Hu, Q., & Ai, M. (2020). Rift: Multi-modal image matching based on radiation-variation insensitive feature transform. IEEE Transactions on Image Processing, 29, 3296\u20133310.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2427_CR20","doi-asserted-by":"crossref","unstructured":"Liang, P., Jiang, J., Liu, X., & Ma, J. (2022). Fusion from decomposition: A self-supervised decomposition approach for image fusion. In: Proceedings of the European Conference on Computer Vision, pp. 719\u2013735","DOI":"10.1007\/978-3-031-19797-0_41"},{"issue":"11","key":"2427_CR21","doi-asserted-by":"publisher","first-page":"7646","DOI":"10.1109\/TCSVT.2022.3184840","volume":"32","author":"G Liao","year":"2022","unstructured":"Liao, G., Gao, W., Li, G., Wang, J., & Kwong, S. (2022). Cross-collaborative fusion-encoder network for robust rgb-thermal salient object detection. IEEE Transactions on Circuits and Systems for Video Technology, 32(11), 7646\u20137661.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2427_CR22","doi-asserted-by":"crossref","unstructured":"Liu, J., Fan, X., Huang, Z., Wu, G., Liu, R., Zhong, W., & Luo, Z. (2022). Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135811","DOI":"10.1109\/CVPR52688.2022.00571"},{"issue":"1","key":"2427_CR23","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TCSVT.2021.3056725","volume":"32","author":"J Liu","year":"2022","unstructured":"Liu, J., Fan, X., Jiang, J., Liu, R., & Luo, Z. (2022). Learning a deep multi-scale feature ensemble and an edge-attention guidance for image fusion. IEEE Transactions on Circuits and Systems for Video Technology, 32(1), 105\u2013119.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"5","key":"2427_CR24","doi-asserted-by":"publisher","first-page":"1748","DOI":"10.1007\/s11263-023-01952-1","volume":"132","author":"J Liu","year":"2023","unstructured":"Liu, J., Lin, R., Wu, G., Liu, R., Luo, Z., & Fan, X. (2023). Coconet: Coupled contrastive learning network with multi-level feature ensemble for multi-modality image fusion. International Journal of Computer Vision, 132(5), 1748\u20131775.","journal-title":"International Journal of Computer Vision"},{"key":"2427_CR25","doi-asserted-by":"crossref","unstructured":"Liu, J., Liu, Z., Wu, G., Ma, L., Liu, R., Zhong, W., Luo, Z., & Fan, X. (2023). Multi-interactive feature learning and a full-time multi-modality benchmark for image fusion and segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 8115\u20138124","DOI":"10.1109\/ICCV51070.2023.00745"},{"issue":"12","key":"2427_CR26","doi-asserted-by":"publisher","first-page":"1882","DOI":"10.1109\/LSP.2016.2618776","volume":"23","author":"Y Liu","year":"2016","unstructured":"Liu, Y., Chen, X., Ward, R. K., & Wang, Z. J. (2016). Image fusion with convolutional sparse representation. IEEE Signal Processing Letters, 23(12), 1882\u20131886.","journal-title":"IEEE Signal Processing Letters"},{"key":"2427_CR27","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/j.inffus.2014.09.004","volume":"24","author":"Y Liu","year":"2015","unstructured":"Liu, Y., Liu, S., & Wang, Z. (2015). A general framework for image fusion based on multi-scale transform and sparse representation. Information Fusion, 24, 147\u2013164.","journal-title":"Information Fusion"},{"issue":"8","key":"2427_CR28","doi-asserted-by":"publisher","first-page":"1528","DOI":"10.1109\/JAS.2022.105770","volume":"9","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Shi, Y., Mu, F., Cheng, J., & Chen, X. (2022). Glioma segmentation-oriented multi-modal mr image fusion with adversarial learning. IEEE\/CAA Journal of Automatica Sinica, 9(8), 1528\u20131531.","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"key":"2427_CR29","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60, 91\u2013110.","journal-title":"International Journal of Computer Vision"},{"issue":"6","key":"2427_CR30","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1109\/TUFFC.2010.1554","volume":"57","author":"J Luo","year":"2010","unstructured":"Luo, J., & Konofagou, E. E. (2010). A fast normalized cross-correlation calculation method for motion estimation. IEEE Transactions on Ultrasonics, Ferroelectrics, and Frequency Control, 57(6), 1347\u20131357.","journal-title":"IEEE Transactions on Ultrasonics, Ferroelectrics, and Frequency Control"},{"issue":"7","key":"2427_CR31","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.1109\/JAS.2022.105686","volume":"9","author":"J Ma","year":"2022","unstructured":"Ma, J., Tang, L., Fan, F., Huang, J., Mei, X., & Ma, Y. (2022). Swinfusion: Cross-domain long-range learning for general image fusion via swin transformer. IEEE\/CAA Journal of Automatica Sinica, 9(7), 1200\u20131217.","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"key":"2427_CR32","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.inffus.2018.09.004","volume":"48","author":"J Ma","year":"2019","unstructured":"Ma, J., Yu, W., Liang, P., Li, C., & Jiang, J. (2019). Fusiongan: A generative adversarial network for infrared and visible image fusion. Information Fusion, 48, 11\u201326.","journal-title":"Information Fusion"},{"key":"2427_CR33","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1016\/j.infrared.2017.02.005","volume":"82","author":"J Ma","year":"2017","unstructured":"Ma, J., Zhou, Z., Wang, B., & Zong, H. (2017). Infrared and visible image fusion based on visual saliency map and weighted least square optimization. Infrared Physics & Technology, 82, 8\u201317.","journal-title":"Infrared Physics & Technology"},{"key":"2427_CR34","doi-asserted-by":"crossref","unstructured":"Mou, J., Gao, W., & Song, Z. (2013). Image fusion based on non-negative matrix factorization and infrared feature extraction. In: Proceedings of the International Congress on Image and Signal Processing, pp. 1046\u20131050","DOI":"10.1109\/CISP.2013.6745210"},{"key":"2427_CR35","doi-asserted-by":"publisher","first-page":"1763","DOI":"10.1109\/TMM.2021.3071243","volume":"24","author":"H Ning","year":"2022","unstructured":"Ning, H., Zheng, X., Lu, X., & Yuan, Y. (2022). Disentangled representation learning for cross-modal biometric matching. IEEE Transactions on Multimedia, 24, 1763\u20131774.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2427_CR36","unstructured":"Qiu, H., Qin, C., Schuh, A., Hammernik, K., & Rueckert, D. (2021). Learning diffeomorphic and modality-invariant registration using b-splines. In: Medical Imaging with Deep Learning"},{"key":"2427_CR37","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"2427_CR38","doi-asserted-by":"crossref","unstructured":"Sun, D., Yang, X., Liu, M.Y., & Kautz, J. (2018). Pwc-net: Cnns for optical flow using pyramid, warping, and cost volume. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8934\u20138943","DOI":"10.1109\/CVPR.2018.00931"},{"issue":"12","key":"2427_CR39","doi-asserted-by":"publisher","first-page":"2121","DOI":"10.1109\/JAS.2022.106082","volume":"9","author":"L Tang","year":"2022","unstructured":"Tang, L., Deng, Y., Ma, Y., Huang, J., & Ma, J. (2022). Superfusion: A versatile image registration and fusion network with semantic awareness. IEEE\/CAA Journal of Automatica Sinica, 9(12), 2121\u20132137.","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"key":"2427_CR40","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.inffus.2021.12.004","volume":"82","author":"L Tang","year":"2022","unstructured":"Tang, L., Yuan, J., & Ma, J. (2022). Image fusion in the loop of high-level vision tasks: A semantic-aware real-time infrared and visible image fusion network. Information Fusion, 82, 28\u201342.","journal-title":"Information Fusion"},{"key":"2427_CR41","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1016\/j.inffus.2022.03.007","volume":"83","author":"L Tang","year":"2022","unstructured":"Tang, L., Yuan, J., Zhang, H., Jiang, X., & Ma, J. (2022). Piafusion: A progressive infrared and visible image fusion network based on illumination aware. Information Fusion, 83, 79\u201392.","journal-title":"Information Fusion"},{"key":"2427_CR42","doi-asserted-by":"publisher","first-page":"101870","DOI":"10.1016\/j.inffus.2023.101870","volume":"99","author":"L Tang","year":"2023","unstructured":"Tang, L., Zhang, H., Xu, H., & Ma, J. (2023). Rethinking the necessity of image fusion in high-level vision tasks: A practical infrared and visible image fusion network based on progressive semantic injection and scene fidelity. Information Fusion, 99, 101870.","journal-title":"Information Fusion"},{"key":"2427_CR43","doi-asserted-by":"publisher","first-page":"5134","DOI":"10.1109\/TIP.2022.3193288","volume":"31","author":"W Tang","year":"2022","unstructured":"Tang, W., He, F., Liu, Y., & Duan, Y. (2022). Matr: Multimodal medical image fusion via multiscale adaptive transformer. IEEE Transactions on Image Processing, 31, 5134\u20135149.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2427_CR44","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2020). Raft: Recurrent all-pairs field transforms for optical flow. In: Proceedings of the European Conference on Computer Vision, pp. 402\u2013419","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"2427_CR45","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/j.dib.2017.09.038","volume":"15","author":"A Toet","year":"2017","unstructured":"Toet, A. (2017). The tno multiband image data collection. Data in Brief, 15, 249\u2013251.","journal-title":"Data in Brief"},{"key":"2427_CR46","doi-asserted-by":"crossref","unstructured":"Truong, P., Danelljan, M., & Timofte, R. (2020) Glu-net: Global-local universal network for dense flow and correspondences. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6258\u20136268","DOI":"10.1109\/CVPR42600.2020.00629"},{"key":"2427_CR47","doi-asserted-by":"crossref","unstructured":"Vidoni, E.D. (2012). The whole brain atlas: www. med. harvard. edu\/aanlib. Journal of Neurologic Physical Therapy 36(2), 108","DOI":"10.1097\/NPT.0b013e3182563795"},{"key":"2427_CR48","doi-asserted-by":"crossref","unstructured":"Wachinger, C., & Navab, N. (2010) Structural image representation for image registration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshope, pp. 23\u201330","DOI":"10.1109\/CVPRW.2010.5543432"},{"key":"2427_CR49","doi-asserted-by":"crossref","unstructured":"Wang, D., Liu, J., Fan, X., & Liu, R. (2022). Unsupervised misaligned infrared and visible image fusion via cross-modality image generation and registration. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 3508\u20133515","DOI":"10.24963\/ijcai.2022\/487"},{"issue":"11","key":"2427_CR50","doi-asserted-by":"publisher","first-page":"15834","DOI":"10.1109\/TNNLS.2023.3290038","volume":"35","author":"G Wu","year":"2024","unstructured":"Wu, G., Jiang, J., & Liu, X. (2024). A practical contrastive learning framework for single-image super-resolution. IEEE Transactions on Neural Networks and Learning Systems, 35(11), 15834\u201315845.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"2427_CR51","doi-asserted-by":"crossref","unstructured":"Wu, H., Qu, Y., Lin, S., Zhou, J., Qiao, R., Zhang, Z., Xie, Y., Ma, L.: Contrastive learning for compact single image dehazing. In: Proceedings of the IEEE Conference on (2021). omputer Vision and Pattern Recognition, pp. 10551\u201310560","DOI":"10.1109\/CVPR46437.2021.01041"},{"key":"2427_CR52","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1016\/j.inffus.2021.06.001","volume":"76","author":"H Xu","year":"2021","unstructured":"Xu, H., & Ma, J. (2021). Emfusion: An unsupervised enhanced medical image fusion network. Information Fusion, 76, 177\u2013186.","journal-title":"Information Fusion"},{"issue":"1","key":"2427_CR53","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1109\/TPAMI.2020.3012548","volume":"44","author":"H Xu","year":"2022","unstructured":"Xu, H., Ma, J., Jiang, J., Guo, X., & Ling, H. (2022). U2fusion: A unified unsupervised image fusion network. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(1), 502\u2013518.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2427_CR54","doi-asserted-by":"crossref","unstructured":"Xu, H., Ma, J., Yuan, J., Le, Z., & Liu, W. (2022). Rfnet: Unsupervised network for mutually reinforcing multi-modal image registration and fusion. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 19679\u201319688","DOI":"10.1109\/CVPR52688.2022.01906"},{"key":"2427_CR55","first-page":"1","volume":"70","author":"H Xu","year":"2021","unstructured":"Xu, H., Wang, X., & Ma, J. (2021). Drf: Disentangled representation for visible and infrared image fusion. IEEE Transactions on Instrumentation and Measurement, 70, 1\u201313.","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"issue":"10","key":"2427_CR56","doi-asserted-by":"publisher","first-page":"12148","DOI":"10.1109\/TPAMI.2023.3283682","volume":"45","author":"H Xu","year":"2023","unstructured":"Xu, H., Yuan, J., & Ma, J. (2023). Murf: Mutually reinforcing multi-modal image registration and fusion. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(10), 12148\u201312166.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2427_CR57","doi-asserted-by":"crossref","unstructured":"Yi, X., Tang, L., Zhang, H., Xu, H., & Ma, J. (2024). Diff-if: Multi-modality image fusion via diffusion model with fusion knowledge prior. Information Fusion, 110, 102450.","DOI":"10.1016\/j.inffus.2024.102450"},{"key":"2427_CR58","doi-asserted-by":"crossref","unstructured":"Yi, X., Xu, H., Zhang, H., Tang, L., & Ma, J. (2024). Text-if: Leveraging semantic text guidance for degradation-aware and interactive image fusion. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 27026\u201327035","DOI":"10.1109\/CVPR52733.2024.02552"},{"issue":"3","key":"2427_CR59","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1016\/j.inffus.2012.01.008","volume":"14","author":"H Yin","year":"2013","unstructured":"Yin, H., Li, S., & Fang, L. (2013). Simultaneous image fusion and super-resolution using sparse representation. Information Fusion, 14(3), 229\u2013240.","journal-title":"Information Fusion"},{"key":"2427_CR60","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1016\/j.inffus.2021.06.008","volume":"76","author":"H Zhang","year":"2021","unstructured":"Zhang, H., Xu, H., Tian, X., Jiang, J., & Ma, J. (2021). Image fusion meets deep learning: A survey and perspective. Information Fusion, 76, 323\u2013336.","journal-title":"Information Fusion"},{"issue":"8","key":"2427_CR61","doi-asserted-by":"publisher","first-page":"10535","DOI":"10.1109\/TPAMI.2023.3261282","volume":"45","author":"X Zhang","year":"2023","unstructured":"Zhang, X., & Demiris, Y. (2023). Visible and infrared image fusion using deep learning. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(8), 10535\u201310554.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2427_CR62","unstructured":"Zhao, F., Huang, Q., & Gao, W. (2006). Image matching by normalized cross-correlation. In: IEEE International Conference on Acoustics Speech and Signal Processing Proceedings, vol.\u00a02, pp. II\u2013II"},{"key":"2427_CR63","doi-asserted-by":"crossref","unstructured":"Zhao, W., Xie, S., Zhao, F., He, Y., & Lu, H. (2023). Metafusion: Infrared and visible image fusion via meta-feature embedding from object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 13955\u201313965","DOI":"10.1109\/CVPR52729.2023.01341"},{"key":"2427_CR64","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Bai, H., Zhang, J., Zhang, Y., Xu, S., Lin, Z., Timofte, R., & Van\u00a0Gool, L. (2023). Cddfuse: Correlation-driven dual-branch feature decomposition for multi-modality image fusion. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5906\u20135916","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"2427_CR65","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Bai, H., Zhu, Y., Zhang, J., Xu, S., Zhang, Y., Zhang, K., Meng, D., Timofte, R., & Van\u00a0Gool, L. (2023) Ddfm: Denoising diffusion model for multi-modality image fusion. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 8082\u20138093","DOI":"10.1109\/ICCV51070.2023.00742"},{"key":"2427_CR66","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Xu, S., Zhang, C., Liu, J., Zhang, J., & Li, P. (2020). Didfuse: deep image decomposition for infrared and visible image fusion. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 970\u2013976","DOI":"10.24963\/ijcai.2020\/135"},{"key":"2427_CR67","doi-asserted-by":"crossref","unstructured":"Zhou, S., Tan, W., & Yan, B. (2022) Promoting single-modal optical flow network for diverse cross-modal flow estimation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 3562\u20133570","DOI":"10.1609\/aaai.v36i3.20268"},{"issue":"1","key":"2427_CR68","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/TIV.2022.3164899","volume":"8","author":"W Zhou","year":"2022","unstructured":"Zhou, W., Dong, S., Lei, J., & Yu, L. (2022). Mtanet: Multitask-aware network with hierarchical multimodal fusion for rgb-t urban scene understanding. IEEE Transactions on Intelligent Vehicles, 8(1), 48\u201358.","journal-title":"IEEE Transactions on Intelligent Vehicles"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02427-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02427-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02427-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:55:19Z","timestamp":1757156119000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02427-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,15]]},"references-count":68,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2427"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02427-1","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,15]]},"assertion":[{"value":"23 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}