{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T08:07:03Z","timestamp":1773043623263,"version":"3.50.1"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T00:00:00Z","timestamp":1773014400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T00:00:00Z","timestamp":1773014400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-026-08359-y","type":"journal-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T07:11:26Z","timestamp":1773040286000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DDSEFuse: dual-branch feature decomposition and single-scale iterative enhancement network for infrared\u2013visible image fusion"],"prefix":"10.1007","volume":"82","author":[{"given":"Junyan","family":"Qin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huiqi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,9]]},"reference":[{"key":"8359_CR1","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/j.inffus.2018.02.004","volume":"45","author":"J Ma","year":"2019","unstructured":"Ma J, Ma Y, Li C (2019) Infrared and visible image fusion methods and applications: a survey. Inf Fusion 45:153\u2013178. https:\/\/doi.org\/10.1016\/j.inffus.2018.02.004","journal-title":"Inf Fusion"},{"issue":"5","key":"8359_CR2","doi-asserted-by":"publisher","first-page":"1808","DOI":"10.1109\/JSTARS.2015.2489838","volume":"9","author":"M Eslami","year":"2016","unstructured":"Eslami M, Mohammadzadeh A (2016) Developing a spectral-based strategy for urban object detection from airborne hyperspectral TIR and visible data. IEEE J Sel Top Appl Earth Observ Remote Sens 9(5):1808\u20131816. https:\/\/doi.org\/10.1109\/JSTARS.2015.2489838","journal-title":"IEEE J Sel Top Appl Earth Observ Remote Sens"},{"key":"8359_CR3","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/j.inffus.2016.05.004","volume":"33","author":"S Li","year":"2017","unstructured":"Li S, Kang X, Fang L, Hu J, Yin H (2017) Pixel-level image fusion: a survey of the state of the art. Inf Fusion 33:100\u2013112. https:\/\/doi.org\/10.1016\/j.inffus.2016.05.004","journal-title":"Inf Fusion"},{"key":"8359_CR4","doi-asserted-by":"publisher","unstructured":"Liu A, Liu X, Fan J, Ma Y, Zhang A, Xie H, Tao D (2019) Perceptual-sensitive GAN for generating adversarial patches. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 33, pp 1028\u20131035. https:\/\/doi.org\/10.1609\/aaai.v33i01.33011028","DOI":"10.1609\/aaai.v33i01.33011028"},{"key":"8359_CR5","doi-asserted-by":"publisher","unstructured":"Qin X, Zhang Z, Huang C, Gao C, Dehghan M, Jagersand M (2019) Basnet: boundary-aware salient object detection. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 7471\u20137481. https:\/\/doi.org\/10.1109\/CVPR.2019.00766","DOI":"10.1109\/CVPR.2019.00766"},{"key":"8359_CR6","doi-asserted-by":"publisher","unstructured":"Wang J, Liu A, Yin Z, Liu S, Tang S, Liu X (2021) Dual attention suppression attack: generate adversarial camouflage in physical world. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 8561\u20138570. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00846","DOI":"10.1109\/CVPR46437.2021.00846"},{"issue":"5","key":"8359_CR7","doi-asserted-by":"publisher","first-page":"1804","DOI":"10.1109\/TCSVT.2020.3014663","volume":"31","author":"Q Zhang","year":"2021","unstructured":"Zhang Q, Xiao T, Huang N, Zhang D, Han J (2021) Revisiting feature fusion for rgb-t salient object detection. IEEE Trans Circuits Syst Video Technol 31(5):1804\u20131818. https:\/\/doi.org\/10.1109\/TCSVT.2020.3014663","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"3","key":"8359_CR8","doi-asserted-by":"publisher","first-page":"1224","DOI":"10.1109\/TCSVT.2021.3077058","volume":"32","author":"W Zhou","year":"2022","unstructured":"Zhou W, Guo Q, Lei J, Yu L, Hwang J-N (2022) Ecffnet: effective and consistent feature fusion network for rgb-t salient object detection. IEEE Trans Circuits Syst Video Technol 32(3):1224\u20131235. https:\/\/doi.org\/10.1109\/TCSVT.2021.3077058","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"10","key":"8359_CR9","doi-asserted-by":"publisher","first-page":"2913","DOI":"10.1109\/TCSVT.2018.2874312","volume":"29","author":"C Li","year":"2019","unstructured":"Li C, Zhu C, Zhang J, Luo B, Wu X, Tang J (2019) Learning local-global multi-graph descriptors for rgb-t object tracking. IEEE Trans Circuits Syst Video Technol 29(10):2913\u20132926. https:\/\/doi.org\/10.1109\/TCSVT.2018.2874312","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"8359_CR10","doi-asserted-by":"publisher","DOI":"10.3390\/e22010118","author":"Y Liu","year":"2020","unstructured":"Liu Y, Yang X, Zhang R, Albertini MK, Celik T, Jeon G (2020) Entropy-based image fusion with joint sparse representation and rolling guidance filter. Entropy. https:\/\/doi.org\/10.3390\/e22010118","journal-title":"Entropy"},{"key":"8359_CR11","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1016\/j.ins.2019.08.066","volume":"508","author":"J Chen","year":"2020","unstructured":"Chen J, Li X, Luo L, Mei X, Ma J (2020) Infrared and visible image fusion based on target-enhanced multiscale transform decomposition. Inf Sci 508:64\u201378. https:\/\/doi.org\/10.1016\/j.ins.2019.08.066","journal-title":"Inf Sci"},{"key":"8359_CR12","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1007\/s12046-017-0673-1","volume":"42","author":"AV Vanmali","year":"2017","unstructured":"Vanmali AV, Gadre VM (2017) Visible and NIR image fusion using weight-map-guided Laplacian\u2013Gaussian pyramid for improving scene visibility. S\u0101dhan\u0101 42:1063\u20131082. https:\/\/doi.org\/10.1007\/s12046-017-0673-1","journal-title":"S\u0101dhan\u0101"},{"key":"8359_CR13","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1016\/j.infrared.2017.04.018","volume":"83","author":"C Liu","year":"2017","unstructured":"Liu C, Qi Y, Ding W (2017) Infrared and visible image fusion method based on saliency detection in sparse domain. Infrared Phys Technol 83:94\u2013102. https:\/\/doi.org\/10.1016\/j.infrared.2017.04.018","journal-title":"Infrared Phys Technol"},{"key":"8359_CR14","doi-asserted-by":"publisher","unstructured":"Bavirisetti DP, Xiao G, Liu G (2017) Multi-sensor image fusion based on fourth order partial differential equations. In: 2017 20th International Conference on Information Fusion (Fusion). IEEE, pp 1\u20139. https:\/\/doi.org\/10.23919\/ICIF.2017.8009719","DOI":"10.23919\/ICIF.2017.8009719"},{"issue":"12","key":"8359_CR15","doi-asserted-by":"publisher","first-page":"9645","DOI":"10.1109\/TIM.2020.3005230","volume":"69","author":"H Li","year":"2020","unstructured":"Li H, Wu X-J, Durrani T (2020) Nestfuse: an infrared and visible image fusion architecture based on nest connection and spatial\/channel attention models. IEEE Trans Instrum Meas 69(12):9645\u20139656. https:\/\/doi.org\/10.1109\/TIM.2020.3005230","journal-title":"IEEE Trans Instrum Meas"},{"key":"8359_CR16","doi-asserted-by":"publisher","unstructured":"Zhang H, Xu H, Xiao Y, Guo X, Ma J (2020) Rethinking the image fusion: a fast unified image fusion network based on proportional maintenance of gradient and intensity. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 34, pp 12797\u201312804. https:\/\/doi.org\/10.1609\/aaai.v34i07.6975","DOI":"10.1609\/aaai.v34i07.6975"},{"key":"8359_CR17","doi-asserted-by":"publisher","first-page":"4980","DOI":"10.1109\/TIP.2020.2977573","volume":"29","author":"J Ma","year":"2020","unstructured":"Ma J, Xu H, Jiang J, Mei X, Zhang X-P (2020) Ddcgan: a dual-discriminator conditional generative adversarial network for multi-resolution image fusion. IEEE Trans Image Process 29:4980\u20134995. https:\/\/doi.org\/10.1109\/TIP.2020.2977573","journal-title":"IEEE Trans Image Process"},{"key":"8359_CR18","doi-asserted-by":"publisher","unstructured":"Su D, Zhang Y, Li H, Li J, Liu Y (2025) Unifuse: a unified all-in-one framework for multi-modal medical image fusion under diverse degradations and misalignments. arXiv preprint arXiv:2506.22736, https:\/\/doi.org\/10.48550\/arXiv.2506.22736","DOI":"10.48550\/arXiv.2506.22736"},{"key":"8359_CR19","doi-asserted-by":"publisher","unstructured":"Zhao Z, Bai H, Zhang J, Zhang Y, Xu S, Lin Z, Timofte R, Van Gool L (2023) Cddfuse: correlation-driven dual-branch feature decomposition for multi-modality image fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5906\u20135916 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00572","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"8359_CR20","doi-asserted-by":"publisher","unstructured":"Liu J, Fan X, Huang Z, Wu G, Liu R, Zhong W, Luo Z (2022) Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5802\u20135811. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00571","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"8359_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2025.102931","volume":"118","author":"X Wang","year":"2025","unstructured":"Wang X, Guan Z, Qian W, Cao J, Ma R, Bi C (2025) A degradation-aware guided fusion network for infrared and visible image. Inf Fusion 118:102931. https:\/\/doi.org\/10.1016\/j.inffus.2025.102931","journal-title":"Inf Fusion"},{"key":"8359_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.infrared.2024.105618","volume":"144","author":"Q Shi","year":"2025","unstructured":"Shi Q, Xi Z, Li H (2025) Nighttime visible and infrared image fusion based on adversarial learning. Infrared Phys Technol 144:105618. https:\/\/doi.org\/10.1016\/j.infrared.2024.105618","journal-title":"Infrared Phys Technol"},{"key":"8359_CR23","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1016\/j.inffus.2022.03.007","volume":"83","author":"L Tang","year":"2022","unstructured":"Tang L, Yuan J, Zhang H, Jiang X, Ma J (2022) Piafusion: a progressive infrared and visible image fusion network based on illumination aware. Inf Fusion 83:79\u201392. https:\/\/doi.org\/10.1016\/j.inffus.2022.03.007","journal-title":"Inf Fusion"},{"key":"8359_CR24","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/j.dib.2017.09.038","volume":"15","author":"A Toet","year":"2017","unstructured":"Toet A (2017) The tno multiband image data collection. Data Brief 15:249. https:\/\/doi.org\/10.1016\/j.dib.2017.09.038","journal-title":"Data Brief"},{"key":"8359_CR25","doi-asserted-by":"publisher","unstructured":"Xu H, Ma J, Le Z, Jiang J, Guo X (2020) Fusiondn: a unified densely connected network for image fusion. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 34, pp 12484\u201312491. https:\/\/doi.org\/10.1609\/aaai.v34i07.6936","DOI":"10.1609\/aaai.v34i07.6936"},{"issue":"5","key":"8359_CR26","doi-asserted-by":"publisher","first-page":"2614","DOI":"10.1109\/TIP.2018.2887342","volume":"28","author":"H Li","year":"2018","unstructured":"Li H, Wu X-J (2018) Densefuse: a fusion approach to infrared and visible images. IEEE Trans Image Process 28(5):2614\u20132623. https:\/\/doi.org\/10.1109\/TIP.2018.2887342","journal-title":"IEEE Trans Image Process"},{"key":"8359_CR27","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.inffus.2021.02.023","volume":"73","author":"H Li","year":"2021","unstructured":"Li H, Wu X-J, Kittler J (2021) Rfn-nest: an end-to-end residual fusion network for infrared and visible images. Inf Fusion 73:72\u201386. https:\/\/doi.org\/10.1016\/j.inffus.2021.02.023","journal-title":"Inf Fusion"},{"key":"8359_CR28","doi-asserted-by":"publisher","unstructured":"Zhao Z, Xu S, Zhang C, Liu J, Li P, Zhang J (2020) Didfuse: deep image decomposition for infrared and visible image fusion. arXiv preprint arXiv:2003.09210, https:\/\/doi.org\/10.24963\/ijcai.2020\/135","DOI":"10.24963\/ijcai.2020\/135"},{"key":"8359_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102147","volume":"103","author":"H Li","year":"2024","unstructured":"Li H, Wu X-J (2024) Crossfuse: a novel cross attention mechanism based infrared and visible image fusion approach. Inf Fusion 103:102147. https:\/\/doi.org\/10.1016\/j.inffus.2023.102147","journal-title":"Inf Fusion"},{"key":"8359_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110192","volume":"148","author":"X Luo","year":"2024","unstructured":"Luo X, Wang J, Zhang Z, Wu X-J (2024) A full-scale hierarchical encoder-decoder network with cascading edge-prior for infrared and visible image fusion. Pattern Recogn 148:110192. https:\/\/doi.org\/10.1016\/j.patcog.2023.110192","journal-title":"Pattern Recogn"},{"key":"8359_CR31","doi-asserted-by":"publisher","first-page":"22190","DOI":"10.1109\/ACCESS.2024.3364050","volume":"12","author":"H Wang","year":"2024","unstructured":"Wang H, Shu C, Li X, Fu Y, Fu Z, Yin X (2024) Two-stream edge-aware network for infrared and visible image fusion with multi-level wavelet decomposition. IEEE Access 12:22190\u201322204. https:\/\/doi.org\/10.1109\/ACCESS.2024.3364050","journal-title":"IEEE Access"},{"key":"8359_CR32","doi-asserted-by":"publisher","unstructured":"Bai H, Zhang J, Zhao Z, Wu Y, Deng L, Cui Y, Feng T, Xu S (2025) Task-driven image fusion with learnable fusion loss. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp 7457\u20137468. https:\/\/doi.org\/10.1109\/CVPR61190.2025.00169","DOI":"10.1109\/CVPR61190.2025.00169"},{"issue":"03","key":"8359_CR33","doi-asserted-by":"publisher","first-page":"1850018","DOI":"10.1142\/S0219691318500182","volume":"16","author":"Y Liu","year":"2018","unstructured":"Liu Y, Chen X, Cheng J, Peng H, Wang Z (2018) Infrared and visible image fusion with convolutional neural networks. Int J Wavelets Multiresolut Inf Process 16(03):1850018. https:\/\/doi.org\/10.1142\/S0219691318500182","journal-title":"Int J Wavelets Multiresolut Inf Process"},{"key":"8359_CR34","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1109\/TCI.2020.2965304","volume":"6","author":"R Hou","year":"2020","unstructured":"Hou R, Zhou D, Nie R, Liu D, Xiong L, Guo Y, Yu C (2020) Vif-net: an unsupervised framework for infrared and visible image fusion. IEEE Trans Comput Imaging 6:640\u2013651. https:\/\/doi.org\/10.1109\/TCI.2020.2965304","journal-title":"IEEE Trans Comput Imaging"},{"key":"8359_CR35","doi-asserted-by":"publisher","unstructured":"Zhao Z, Xu S, Zhang J, Liang C, Zhang C, Liu J (2020) Efficient and model-based infrared and visible image fusion via algorithm unrolling. arXiv preprint arXiv:2005.05896, https:\/\/doi.org\/10.1109\/TCSVT.2021.3075745","DOI":"10.1109\/TCSVT.2021.3075745"},{"key":"8359_CR36","doi-asserted-by":"publisher","first-page":"86413","DOI":"10.1109\/ACCESS.2023.3302702","volume":"11","author":"Z Pan","year":"2023","unstructured":"Pan Z, Ouyang W (2023) An efficient network model for visible and infrared image fusion. IEEE Access 11:86413\u201386430. https:\/\/doi.org\/10.1109\/ACCESS.2023.3302702","journal-title":"IEEE Access"},{"key":"8359_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.104015","volume":"98","author":"C Yang","year":"2024","unstructured":"Yang C, He Y, Sun C, Chen B, Cao J, Wang Y, Hao Q (2024) Multi-scale convolutional neural networks and saliency weight maps for infrared and visible image fusion. J Vis Commun Image Represent 98:104015. https:\/\/doi.org\/10.1016\/j.jvcir.2023.104015","journal-title":"J Vis Commun Image Represent"},{"key":"8359_CR38","doi-asserted-by":"publisher","DOI":"10.1109\/TCI.2024.3369398","author":"H Tang","year":"2024","unstructured":"Tang H, Liu G, Qian Y, Wang J, Xiong J (2024) Egefusion: towards edge gradient enhancement in infrared and visible image fusion with multi-scale transform. IEEE Trans Comput Imaging. https:\/\/doi.org\/10.1109\/TCI.2024.3369398","journal-title":"IEEE Trans Comput Imaging"},{"key":"8359_CR39","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.inffus.2018.09.004","volume":"48","author":"J Ma","year":"2019","unstructured":"Ma J, Yu W, Liang P, Li C, Jiang J (2019) Fusiongan: a generative adversarial network for infrared and visible image fusion. Inf Fusion 48:11\u201326. https:\/\/doi.org\/10.1016\/j.inffus.2018.09.004","journal-title":"Inf Fusion"},{"key":"8359_CR40","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.inffus.2019.07.005","volume":"54","author":"J Ma","year":"2020","unstructured":"Ma J, Liang P, Yu W, Chen C, Guo X, Wu J, Jiang J (2020) Infrared and visible image fusion via detail preserving adversarial learning. Inf Fusion 54:85\u201398. https:\/\/doi.org\/10.1016\/j.inffus.2019.07.005","journal-title":"Inf Fusion"},{"key":"8359_CR41","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1016\/j.ins.2021.06.083","volume":"576","author":"J Fu","year":"2021","unstructured":"Fu J, Li W, Du J, Xu L (2021) Dsagan: a generative adversarial network based on dual-stream attention mechanism for anatomical and functional image fusion. Inf Sci 576:484\u2013506. https:\/\/doi.org\/10.1016\/j.ins.2021.06.083","journal-title":"Inf Sci"},{"key":"8359_CR42","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1016\/j.inffus.2022.12.007","volume":"92","author":"Y Rao","year":"2023","unstructured":"Rao Y, Wu D, Han M, Wang T, Yang Y, Lei T, Zhou C, Bai H, Xing L (2023) At-gan: a generative adversarial network with attention and transition for infrared and visible image fusion. Inf Fusion 92:336\u2013349. https:\/\/doi.org\/10.1016\/j.inffus.2022.12.007","journal-title":"Inf Fusion"},{"key":"8359_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2023.3282300","volume":"72","author":"S Huang","year":"2023","unstructured":"Huang S, Song Z, Yang Y, Wan W, Kong X (2023) Magan: multiattention generative adversarial network for infrared and visible image fusion. IEEE Trans Instrum Meas 72:1\u201314. https:\/\/doi.org\/10.1109\/TIM.2023.3282300","journal-title":"IEEE Trans Instrum Meas"},{"key":"8359_CR44","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2023.104316","volume":"145","author":"K Li","year":"2024","unstructured":"Li K, Liu G, Gu X, Tang H, Xiong J, Qian Y (2024) Dant-gan: a dual attention-based of nested training network for infrared and visible image fusion. Digit Signal Process 145:104316. https:\/\/doi.org\/10.1016\/j.dsp.2023.104316","journal-title":"Digit Signal Process"},{"key":"8359_CR45","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2025.110238","volume":"123","author":"X Xie","year":"2025","unstructured":"Xie X, Guo B, Li P, He S, Zhou S (2025) Multi-focus image fusion with visual state space model and dual adversarial learning. Comput Electr Eng 123:110238. https:\/\/doi.org\/10.1016\/j.compeleceng.2025.110238","journal-title":"Comput Electr Eng"},{"key":"8359_CR46","doi-asserted-by":"publisher","first-page":"15","DOI":"10.48550\/arXiv.1706.03762","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30:15. https:\/\/doi.org\/10.48550\/arXiv.1706.03762","journal-title":"Adv Neural Inf Process Syst"},{"key":"8359_CR47","doi-asserted-by":"publisher","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"8359_CR48","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021). doi:10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"8359_CR49","doi-asserted-by":"publisher","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp 10347\u201310357. PMLR. https:\/\/doi.org\/10.48550\/arXiv.2012.12877","DOI":"10.48550\/arXiv.2012.12877"},{"key":"8359_CR50","doi-asserted-by":"publisher","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European Conference on Computer Vision, pp 213\u2013229. Springer https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"8359_CR51","doi-asserted-by":"publisher","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 568\u2013578. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00061","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"8359_CR52","doi-asserted-by":"publisher","unstructured":"Zheng S, Lu J, Zhao H, Zhu X, Luo Z, Wang Y, Fu Y, Feng J, Xiang T, Torr PH et al (2021) Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 6881\u20136890. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00681","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"8359_CR53","doi-asserted-by":"publisher","unstructured":"Ju X, Zhang D, Li J, Zhou G (2020) Transformer-based label set generation for multi-modal multi-label emotion detection. In: Proceedings of the 28th ACM International Conference on Multimedia, pp 512\u2013520. https:\/\/doi.org\/10.1145\/3394171.3413577","DOI":"10.1145\/3394171.3413577"},{"key":"8359_CR54","doi-asserted-by":"publisher","unstructured":"Zhao J, Zhao Y, Li J (2021) M3tr: multi-modal multi-label recognition with transformer. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 469\u2013477. https:\/\/doi.org\/10.1145\/3474085.3475191","DOI":"10.1145\/3474085.3475191"},{"key":"8359_CR55","doi-asserted-by":"publisher","unstructured":"Chen H, Wang Y, Guo T, Xu C, Deng Y, Liu Z, Ma S, Xu C, Xu C, Gao W (2021) Pre-trained image processing transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12299\u201312310. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01212","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"8359_CR56","doi-asserted-by":"publisher","unstructured":"Liang J, Cao J, Sun G, Zhang K, Van Gool L, Timofte R (2021) Swinir: Image restoration using swin transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 1833\u20131844. https:\/\/doi.org\/10.1109\/ICCVW54120.2021.00210","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"8359_CR57","doi-asserted-by":"publisher","unstructured":"Wu Z, Liu Z, Lin J, Lin Y, Han S (202) Lite transformer with long-short range attention. arXiv preprint arXiv:2004.11886, https:\/\/doi.org\/10.48550\/arXiv.2004.11886","DOI":"10.48550\/arXiv.2004.11886"},{"key":"8359_CR58","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3191664","volume":"71","author":"Z Wang","year":"2022","unstructured":"Wang Z, Chen Y, Shao W, Li H, Zhang L (2022) Swinfuse: a residual swin transformer fusion network for infrared and visible images. IEEE Trans Instrum Meas 71:1\u201312. https:\/\/doi.org\/10.1109\/TIM.2022.3191664","journal-title":"IEEE Trans Instrum Meas"},{"key":"8359_CR59","doi-asserted-by":"publisher","unstructured":"Zamir SW, Arora A, Khan S, Hayat M, Khan FS, Yang M-H (2022) Restormer: efficient transformer for high-resolution image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5728\u20135739. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00564","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"8359_CR60","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3541877","author":"B Cao","year":"2025","unstructured":"Cao B, Qi G, Zhao J, Zhu P, Hu Q, Gao X (2025) Rtf: recursive transfusion for multi-modal image synthesis. IEEE Trans Image Process. https:\/\/doi.org\/10.1109\/TIP.2025.3541877","journal-title":"IEEE Trans Image Process"},{"key":"8359_CR61","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969073","author":"V Mnih","year":"2014","unstructured":"Mnih V, Heess N, Graves A, Kavukcuoglu K (2014) Recurrent models of visual attention. Adv Neural Inf Process Syst. https:\/\/doi.org\/10.5555\/2969033.2969073","journal-title":"Adv Neural Inf Process Syst"},{"key":"8359_CR62","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1506.02025","author":"M Jaderberg","year":"2015","unstructured":"Jaderberg M, Simonyan K, Zisserman A et al (2015) Spatial transformer networks. Adv Neural Inf Process Syst. https:\/\/doi.org\/10.48550\/arXiv.1506.02025","journal-title":"Adv Neural Inf Process Syst"},{"key":"8359_CR63","doi-asserted-by":"publisher","unstructured":"Hu J, Shen L, Sun, G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 7132\u20137141 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00745","DOI":"10.1109\/CVPR.2018.00745"},{"key":"8359_CR64","doi-asserted-by":"publisher","unstructured":"Gao Z, Xie J, Wang Q, Li P (2019) Global second-order pooling convolutional networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3024\u20133033. https:\/\/doi.org\/10.1109\/CVPR.2019.00314","DOI":"10.1109\/CVPR.2019.00314"},{"key":"8359_CR65","doi-asserted-by":"publisher","unstructured":"Wang Q, Wu B, Zhu P, Li P, Zuo W, Hu Q (2020) Eca-net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11534\u201311542. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01155","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"8359_CR66","doi-asserted-by":"publisher","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) Cbam: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 3\u201319. https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"8359_CR67","doi-asserted-by":"publisher","unstructured":"Zhang H, Wu C, Zhang Z, Zhu Y, Lin H, Zhang Z, Sun Y, He T, Mueller J, Manmatha R et al (2022) Resnest: split-attention networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2736\u20132746. https:\/\/doi.org\/10.1109\/CVPRW56347.2022.00309","DOI":"10.1109\/CVPRW56347.2022.00309"},{"key":"8359_CR68","doi-asserted-by":"publisher","unstructured":"Dinh L, Krueger D, Bengio Y (2014) Nice: non-linear independent components estimation. arXiv preprint arXiv:1410.8516, https:\/\/doi.org\/10.48550\/arXiv.1410.8516","DOI":"10.48550\/arXiv.1410.8516"},{"key":"8359_CR69","doi-asserted-by":"publisher","unstructured":"Ardizzone L, Kruse J, Wirkert S, Rahner D, Pellegrini EW, Klessen RS, Maier-Hein L, Rother C, K\u00f6the U (2018) Analyzing inverse problems with invertible neural networks. arXiv preprint arXiv:1808.04730, https:\/\/doi.org\/10.48550\/arXiv.1808.04730","DOI":"10.48550\/arXiv.1808.04730"},{"key":"8359_CR70","doi-asserted-by":"publisher","unstructured":"Dinh L, Sohl-Dickstein J, Bengio S (2016) Density estimation using real nvp. arXiv preprint arXiv:1605.08803, https:\/\/doi.org\/10.48550\/arXiv.1605.08803","DOI":"10.48550\/arXiv.1605.08803"},{"key":"8359_CR71","doi-asserted-by":"publisher","unstructured":"Gomez AN, Ren M, Urtasun R, Grosse RB (2017) The reversible residual network: backpropagation without storing activations. In: Advances in neural information processing systems, vol 3. https:\/\/doi.org\/10.48550\/arXiv.1707.04585","DOI":"10.48550\/arXiv.1707.04585"},{"key":"8359_CR72","doi-asserted-by":"publisher","unstructured":"Kingma DP, Dhariwal P (2018) Glow: generative flow with invertible 1 x 1 convolutions. In: Advances in neural information processing systems, vol 31. https:\/\/doi.org\/10.48550\/arXiv.1807.03039","DOI":"10.48550\/arXiv.1807.03039"},{"key":"8359_CR73","doi-asserted-by":"publisher","unstructured":"Ardizzone L, L\u00fcth C, Kruse J, Rother C, K\u00f6the U (2019) Guided image generation with conditional invertible neural networks. arXiv preprint arXiv:1907.02392, https:\/\/doi.org\/10.48550\/arXiv.1907.02392","DOI":"10.48550\/arXiv.1907.02392"},{"key":"8359_CR74","doi-asserted-by":"publisher","unstructured":"Xiao M, Zheng S, Liu C, Wang Y, He D, Ke G, Bian J, Lin Z, Liu T-Y (2020) Invertible image rescaling. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp 126\u2013144. Springer. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_8","DOI":"10.1007\/978-3-030-58452-8_8"},{"key":"8359_CR75","doi-asserted-by":"publisher","unstructured":"Zhou M, Yan K, Huang J, Yang Z, Fu X, Zhao F (2022) Mutual information-driven pan-sharpening. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1798\u20131808. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00184","DOI":"10.1109\/CVPR52688.2022.00184"},{"key":"8359_CR76","doi-asserted-by":"publisher","unstructured":"Zhu X, Li Z, Zhang X-Y, Li C, Liu Y, Xue Z (2019) Residual invertible spatio-temporal network for video super-resolution. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 33, pp 5981\u20135988. https:\/\/doi.org\/10.1609\/aaai.v33i01.33015981","DOI":"10.1609\/aaai.v33i01.33015981"},{"issue":"4","key":"8359_CR77","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z (2004) Image quality assessment: form error visibility to structural similarity. IEEE Trans Image Process 13(4):604\u2013606. https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans Image Process"},{"issue":"1","key":"8359_CR78","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1080\/10408340500526766","volume":"36","author":"AG Asuero","year":"2006","unstructured":"Asuero AG, Sayago A, Gonz\u00e1lez A (2006) The correlation coefficient: an overview. Crit Rev Anal Chem 36(1):41\u201359. https:\/\/doi.org\/10.1080\/10408340500526766","journal-title":"Crit Rev Anal Chem"},{"key":"8359_CR79","doi-asserted-by":"publisher","unstructured":"Ardizzone L, Kruse J, Wirkert S, Rahner D, Pellegrini EW, Klessen RS, Maier-Hein L, Rother C, K\u00f6the U (2018) Analyzing inverse problems with invertible neural networks. arXiv preprint arXiv:1808.04730, https:\/\/doi.org\/10.48550\/arXiv.1808.04730","DOI":"10.48550\/arXiv.1808.04730"},{"key":"8359_CR80","doi-asserted-by":"publisher","unstructured":"Xu H, Ma J, Yuan J, Le Z, Liu W (2022) Rfnet: unsupervised network for mutually reinforcing multi-modal image registration and fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 19679\u201319688. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01906","DOI":"10.1109\/CVPR52688.2022.01906"},{"key":"8359_CR81","doi-asserted-by":"publisher","unstructured":"Huang Z, Liu J, Fan X, Liu R, Zhong W, Luo Z (2022) Reconet: recurrent correction network for fast and efficient multi-modality image fusion. In: European Conference on Computer Vision, pp 539\u2013555. Springer https:\/\/doi.org\/10.1007\/978-3-031-19797-0_31","DOI":"10.1007\/978-3-031-19797-0_31"},{"key":"8359_CR82","doi-asserted-by":"publisher","unstructured":"Zhao W, Xie S, Zhao F, He Y, Lu H (2023) Metafusion: infrared and visible image fusion via meta-feature embedding from object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13955\u201313965. https:\/\/doi.org\/10.1109\/CVPR52729.2023.01341","DOI":"10.1109\/CVPR52729.2023.01341"},{"issue":"7","key":"8359_CR83","doi-asserted-by":"publisher","first-page":"3159","DOI":"10.1109\/TCSVT.2023.3234340","volume":"33","author":"W Tang","year":"2023","unstructured":"Tang W, He F, Liu Y, Duan Y, Si T (2023) Datfuse: infrared and visible image fusion via dual attention transformer. IEEE Trans Circuits Syst Video Technol 33(7):3159\u20133172. https:\/\/doi.org\/10.1109\/TCSVT.2023.3234340","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"8359_CR84","doi-asserted-by":"publisher","unstructured":"Zhao Z, Bai H, Zhang J, Zhang Y, Xu S, Lin Z, Timofte R, Van Gool L (2023) Cddfuse: correlation-driven dual-branch feature decomposition for multi-modality image fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5906\u20135916. https:\/\/doi.org\/10.1109\/CVPR52729.2023.00572","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"8359_CR85","doi-asserted-by":"publisher","unstructured":"Zhao Z, Bai H, Zhang J, Zhang Y, Zhang K, Xu S, Chen D, Timofte R, Van Gool L (2024) Equivariant multi-modality image fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 25912\u20132592. https:\/\/doi.org\/10.1109\/CVPR52733.2024.02448","DOI":"10.1109\/CVPR52733.2024.02448"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08359-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-026-08359-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08359-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T07:11:32Z","timestamp":1773040292000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-026-08359-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,9]]},"references-count":85,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2026,3]]}},"alternative-id":["8359"],"URL":"https:\/\/doi.org\/10.1007\/s11227-026-08359-y","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,9]]},"assertion":[{"value":"6 September 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"233"}}