{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:15:21Z","timestamp":1774721721229,"version":"3.50.1"},"reference-count":125,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100020963","name":"European Union\u2019s Horizon Europe Research and Development Program","doi-asserted-by":"publisher","award":["101073924"],"award-info":[{"award-number":["101073924"]}],"id":[{"id":"10.13039\/501100020963","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3467062","type":"journal-article","created":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T17:26:55Z","timestamp":1727198815000},"page":"159794-159820","source":"Crossref","is-referenced-by-count":46,"title":["Multimodal Explainable Artificial Intelligence: A Comprehensive Review of Methodological Advances and Future Research Directions"],"prefix":"10.1109","volume":"12","author":[{"given":"Nikolaos","family":"Rodis","sequence":"first","affiliation":[{"name":"Department of Informatics and Telematics, Harokopio University of Athens, Attica, Athens, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7262-7310","authenticated-orcid":false,"given":"Christos","family":"Sardianos","sequence":"additional","affiliation":[{"name":"Department of Informatics and Telematics, Harokopio University of Athens, Attica, Athens, Greece"}]},{"given":"Panagiotis","family":"Radoglou-Grammatikis","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Western Macedonia, Kozani, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6042-0355","authenticated-orcid":false,"given":"Panagiotis","family":"Sarigiannidis","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Western Macedonia, Kozani, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0876-8167","authenticated-orcid":false,"given":"Iraklis","family":"Varlamis","sequence":"additional","affiliation":[{"name":"Department of Informatics and Telematics, Harokopio University of Athens, Attica, Athens, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1686-421X","authenticated-orcid":false,"given":"Georgios Th.","family":"Papadopoulos","sequence":"additional","affiliation":[{"name":"Department of Informatics and Telematics, Harokopio University of Athens, Attica, Athens, Greece"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3434398"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3507902"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.13"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3441691"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3080517"},{"key":"ref7","article-title":"Grad-CAM: Why did you say that?","author":"R Selvaraju","year":"2016","journal-title":"arXiv:1611.07450"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2975980"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2019.12.012"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3409843"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3422416"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3236009"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3395444"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3398203"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3347028"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2870052"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00879"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_1"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.143"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00915"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_34"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.maiworkshop-1.4"},{"key":"ref26","first-page":"1","article-title":"Learn to explain: Multimodal reasoning via thought chains for science question answering","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.215"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.12"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2017.10.001"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3070212"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0965-7"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref36","article-title":"The caltech-UCSD birds-200\u20132011 dataset","author":"Wah","year":"2011"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15552-9_29"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1212.0402"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_35"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00688"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-011-5318-8_75"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2019.2906190"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093295"},{"key":"ref44","first-page":"1","article-title":"Towards transparent AI systems: Interpreting visual question answering models","volume":"1","author":"Goyal","year":"2016","journal-title":"Comput. Vis. Pattern Recognit."},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4812"},{"key":"ref46","article-title":"A study on multimodal and interactive explanations for visual question answering","author":"Alipour","year":"2020","journal-title":"arXiv:2003.00431"},{"key":"ref47","first-page":"289","article-title":"Hierarchical question-image co-attention for visual question answering","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Lu"},{"key":"ref48","article-title":"Improving VQA and its explanations by comparing competing explanations","author":"Wu","year":"2020","journal-title":"arXiv:2006.15631"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00268"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/HCCAI49649.2020.00010"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.540"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref54","article-title":"Show, ask, attend, and answer: A strong baseline for visual question answering","author":"Kazemi","year":"2017","journal-title":"arXiv:1704.03162"},{"key":"ref55","article-title":"Generating natural language explanations for visual question answering using scene graphs and visual attention","author":"Ghosh","year":"2019","journal-title":"arXiv:1902.05715"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00801"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00754"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00137"},{"key":"ref59","article-title":"Visual explanations from Hadamard product in multimodal deep networks","author":"Kim","year":"2017","journal-title":"ArXiv:1712.06228"},{"key":"ref60","first-page":"1","article-title":"Interpretable counting for visual question answering","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Trott"},{"key":"ref61","first-page":"6428","article-title":"Probabilistic neural symbolic models for interpretable visual question answering","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Vedantam"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00519"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00043"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI46756.2018.00098"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.07.008"},{"key":"ref66","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","volume":"2015","author":"Xu","year":"2015","journal-title":"Comput. Sci."},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00654"},{"key":"ref68","first-page":"1","article-title":"Rise: Randomized input sampling for explanation of black-box models","volume-title":"Proc. Brit. Mach. Vis. Conf.","author":"Petsiuk"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.334"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.110"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.253"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2828437"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331254"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CBMI.2019.8877393"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_17"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_32"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33012539"},{"key":"ref78","article-title":"Generating counterfactual explanations with natural language","author":"Anne Hendricks","year":"2018","journal-title":"arXiv:1806.09809"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00880"},{"key":"ref80","article-title":"InterpNET: Neural introspection for interpretable deep learning","author":"Barratt","year":"2017","journal-title":"arXiv:1710.09511"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.2987729"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390672"},{"key":"ref83","first-page":"1","article-title":"An inherently explainable model for video activity interpretation","volume-title":"Proc. Workshops 32nd AAAI Conf. Artif. Intell.","author":"Aakur"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351040"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1718"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892315"},{"key":"ref88","article-title":"Group gated fusion on attention-based bidirectional alignment for multimodal emotion recognition","author":"Liu","year":"2022","journal-title":"arXiv:2201.06309"},{"key":"ref89","article-title":"Interpretable multi-modal hate speech detection","author":"Vijayaraghavan","year":"2021","journal-title":"arXiv:2103.01616"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3178236"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358160"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.210"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00968"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3069920"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33850-3_3"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2022.106620"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/BIBE52308.2021.9635541"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2018.2843369"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.3390\/e23010018"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534148"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d16-1044"},{"key":"ref103","article-title":"Visual entailment: A novel task for fine-grained image understanding","author":"Xie","year":"2019","journal-title":"arXiv:1901.06706"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1164"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_34"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00097"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-3020"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"ref109","first-page":"4768","article-title":"A unified approach to interpreting model predictions","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Lundberg"},{"key":"ref110","first-page":"3145","article-title":"Learning important features through propagating activation differences","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Shrikumar"},{"key":"ref111","first-page":"3319","article-title":"Axiomatic attribution for deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"70","author":"Sundararajan"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1176"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0130140"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2022.119504"},{"key":"ref115","article-title":"Striving for simplicity: The all convolutional net","volume-title":"ICLR Workshop Track","author":"Springenberg"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.232"},{"key":"ref117","first-page":"2668","article-title":"Interpretability beyond feature attribution: Quantitative testing with concept activation vectors","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref118","article-title":"Towards a rigorous science of interpretable machine learning","author":"Doshi-Velez","year":"2017","journal-title":"arXiv:1702.08608"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.780"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-017-1059-x"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11197"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110273"},{"key":"ref124","article-title":"Attention is not explanation","author":"Jain","year":"2019","journal-title":"North American Chapter of the Association for Computational Linguistics"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1002"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10689601.pdf?arnumber=10689601","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:30Z","timestamp":1732665630000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10689601\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":125,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3467062","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}