{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T07:40:17Z","timestamp":1769154017746,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/cbmi66578.2025.11339337","type":"proceedings-article","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T20:38:56Z","timestamp":1768941536000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["An Experimental Study on Generating Plausible Textual Explanations for Video Summarization*"],"prefix":"10.1109","author":[{"given":"Thomas","family":"Eleftheriadis","sequence":"first","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]},{"given":"Evlampios","family":"Apostolidis","sequence":"additional","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]},{"given":"Vasileios","family":"Mezaris","sequence":"additional","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3117472"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-6684-7366-5.ch065"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ISM55400.2022.00029"},{"key":"ref4","first-page":"41","article-title":"A study on the use of attention for explaining video summarization","volume-title":"Proc. of the 2nd Workshop on User-Centric Narrative Summarization of Long Videos, ser. NarSUM \u201923. New York","author":"Apostolidis"},{"key":"ref5","first-page":"477","article-title":"Improving the faithfulness of attention-based explanations with task-specific information for text classification","volume-title":"Proc. of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th Int. Joint Conf. on Natural Language Processing (Volume 1: Long Papers)","author":"Chrysostomou","year":"2021"},{"key":"ref6","first-page":"13807","article-title":"Rethinking attention-model explainability through faithfulness violation test","volume-title":"Proc. of the 39th Int. Conf. on Machine Learning, ser. Proc. of Machine Learning Research","volume":"162","author":"Liu"},{"key":"ref7","first-page":"2630","article-title":"Causalainer: Causal explainer for automatic video summarization","volume-title":"Proc. of the 2023 IEEEICVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW). Los Alamitos","author":"Huang"},{"key":"ref8","first-page":"4","article-title":"An integrated framework for multi-granular explanation of video summarization","volume":"V","author":"Tsigos","year":"2024","journal-title":"Frontiers in Signal Processing"},{"issue":"9","key":"ref9","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1007\/s10462-024-10852-w","article-title":"A review of evaluation approaches for explainable ai with applications in cardiology","volume":"57","author":"Salih","year":"2024","journal-title":"Artificial Intelligence Review"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"4198","DOI":"10.18653\/v1\/2020.acl-main.386","article-title":"Towards faithfully interpretable NLP systems: How should we define and evaluate faithfulness?","volume-title":"Proc. of the 58th Annual Meeting of the Association for Computational Linguistics","author":"Jacovi","year":"2020"},{"key":"ref11","author":"Jin","year":"2024","journal-title":"Why is plausibility surprisingly problematic as an xai criterion?"},{"issue":"1","key":"ref12","doi-asserted-by":"crossref","first-page":"1096","DOI":"10.1038\/s41467-019-08987-4","article-title":"Unmasking clever hans predictors and assessing what machines really learn","volume":"10","author":"Lapuschkin","year":"2019","journal-title":"Nature Communications"},{"issue":"2","key":"ref13","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","article-title":"The pascal visual object classes (voc) challenge","volume":"88","author":"Everingham","year":"2010","journal-title":"International Journal of Computer Vision"},{"key":"ref14","first-page":"3982","article-title":"Sentence-BERT: Sentence embeddings using Siamese BERT-networks","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Reimers"},{"key":"ref15","first-page":"6894","article-title":"SimCSE: Simple contrastive learning of sentence embeddings","volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","author":"Gao"},{"key":"ref16","author":"Li","year":"2024","journal-title":"Llava-onevision: Easy visual task transfer"},{"key":"ref17","first-page":"411","article-title":"Interpreting video features: A comparison of 3D convolutional networks and con-volutional LSTM networks","volume-title":"Asian Conference on Computer Vision (ACCV) 2020, H., Ishikawa, C.-L. Liu, T., Pajdla, and J., Shi, Eds. Cham: Springer International Publishing","author":"Manttari"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00116"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3405788"},{"key":"ref20","first-page":"521","article-title":"Explainable video action reasoning via prior knowledge and state transitions","volume-title":"Proc. of the 27th ACM International Conference on Multimedia, ser. MM \u201919","author":"Zhuo"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1016\/j.neucom.2022.02.069","article-title":"One-shot video graph generation for explainable action reasoning","volume":"488","author":"Han","year":"2022","journal-title":"Neurocomputing"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1002\/ail2.59"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-021-03439-5"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.01795"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2021.3093585"},{"key":"ref26","first-page":"1506","article-title":"Discrete neural representations for explainable anomaly detection","volume-title":"Proc. of the 2022 IEEEICVF Winter Conference on Applications of Computer Vision (WACV). Los Alamitos","author":"Szymanowicz"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00053"},{"key":"ref28","first-page":"4324","article-title":"Can ChatGPT Detect DeepFakes? A Study of Using Multimodal Large Language Models for Media Forensics","volume-title":"2024 IEEEICVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW). Los Alamitos","author":"Jia"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW65960.2025.00080"},{"key":"ref30","first-page":"407","article-title":"Summarizing videos using concentrated attention and considering the uniqueness and diversity of the video frames","volume-title":"Proc. of the 2022 Int. Conf. on Multimedia Retrieval, ser. ICMR \u201922","author":"Apostolidis","year":"2022"},{"key":"ref31","first-page":"505","article-title":"Creating Summaries from User Videos","volume-title":"Proc. of the 2014 European Conference on Computer Vision (ECCV)","author":"Gygli"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"5179","DOI":"10.1109\/CVPR.2015.7299154","article-title":"TVSum: Summarizing web videos using titles","author":"Song","year":"2015","journal-title":"Proc. of the 2015 IEEEICVF Con! on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-3020"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.2307\/2332303"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"}],"event":{"name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","location":"Dublin, Ireland","start":{"date-parts":[[2025,10,22]]},"end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 International Conference on Content-Based Multimedia Indexing (CBMI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11339229\/11339242\/11339337.pdf?arnumber=11339337","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T07:11:39Z","timestamp":1768979499000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11339337\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/cbmi66578.2025.11339337","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}