{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T07:59:33Z","timestamp":1770796773982,"version":"3.50.0"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62577049"],"award-info":[{"award-number":["62577049"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s00530-025-02084-x","type":"journal-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T09:15:04Z","timestamp":1765012504000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GraphVSum:graph guided multimodal video summarization"],"prefix":"10.1007","volume":"32","author":[{"given":"Zhengqi","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cong","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengyi","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"2084_CR1","doi-asserted-by":"publisher","unstructured":"Li, H., Zhu, J., Liu, T., etal.: Multi-modal sentence summarization with modality attention and image filtering. In: IJCAI, pp. 4152\u20134158 (2018). https:\/\/doi.org\/10.24963\/ijcai.2018\/577","DOI":"10.24963\/ijcai.2018\/577"},{"issue":"8","key":"2084_CR2","doi-asserted-by":"publisher","first-page":"5181","DOI":"10.1109\/TNNLS.2021.3119969","volume":"34","author":"B Zhao","year":"2021","unstructured":"Zhao, B., Gong, M., Li, X.: Audiovisual video summarization. IEEE Trans. Neural Netw. Learn. Syst. 34(8), 5181\u20135188 (2021). https:\/\/doi.org\/10.1109\/TNNLS.2021.3119969","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"2084_CR3","unstructured":"Narasimhan, M., Rohrbach, A., Darrell, T.: Clip-it! language-guided video summarization. In: NeurIPS, vol. 34, pp. 13988\u201314000 (2021). https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/7503cfacd12053d309b6bed5c89de212-Abstract.html"},{"issue":"4","key":"2084_CR4","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/s00530-025-01860-z","volume":"31","author":"P Kadam","year":"2025","unstructured":"Kadam, P., Vora, D.: Systematic frame selection and quality assessment for efficient video summarization. Multimed. Syst. 31(4), 279 (2025). https:\/\/doi.org\/10.1007\/s00530-025-01860-z","journal-title":"Multimed. Syst."},{"key":"2084_CR5","doi-asserted-by":"publisher","unstructured":"Zhu, J., Li, H., Liu, T., etal.: Msmo: Multimodal summarization with multimodal output. In: EMNLP, pp. 4154\u20134164 (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1448","DOI":"10.18653\/v1\/D18-1448"},{"key":"2084_CR6","doi-asserted-by":"publisher","unstructured":"Fu, X., Wang, J., Yang, Z.: Mm-avs: A full-scale dataset for multi-modal summarization. In: NAACL, pp. 5922\u20135926 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.naacl-main.473","DOI":"10.18653\/v1\/2021.naacl-main.473"},{"key":"2084_CR7","doi-asserted-by":"publisher","first-page":"5548","DOI":"10.1109\/TMM.2023.3335875","volume":"26","author":"J Lin","year":"2023","unstructured":"Lin, J., Hua, H., Chen, M., et al.: Videoxum: Cross-modal visual and textural summarization of videos. IEEE Trans. Multimed. 26, 5548\u20135560 (2023). https:\/\/doi.org\/10.1109\/TMM.2023.3335875","journal-title":"IEEE Trans. Multimed."},{"key":"2084_CR8","doi-asserted-by":"publisher","unstructured":"Singh, A.K., Srivastava, D., Tapaswi, M.: Previously on... from recaps to story summarization. In: CVPR, pp. 13635\u201313646 (2024). https:\/\/doi.org\/10.1016\/j.imavis.2024.105168","DOI":"10.1016\/j.imavis.2024.105168"},{"key":"2084_CR9","doi-asserted-by":"publisher","unstructured":"Qiu, J., Zhu, J., Han, W., etal.: Mmsum: A dataset for multimodal summarization and thumbnail generation of videos. In: CVPR, pp. 21909\u201321921 (2024). https:\/\/doi.org\/10.1109\/CVPR52733.2024.02069","DOI":"10.1109\/CVPR52733.2024.02069"},{"issue":"4","key":"2084_CR10","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/s00530-025-01865-8","volume":"31","author":"D Wang","year":"2025","unstructured":"Wang, D., Tohti, T., Han, D., Zuo, Z., Liang, Y., Liao, Y., Yang, Q.: Vef-bart: an effective method to mitigate hallucinations through vision enhancement and fusion in bart-based multimodal abstractive summarization. Multimed. Syst. 31(4), 282 (2025). https:\/\/doi.org\/10.1007\/s00530-025-01865-8","journal-title":"Multimed. Syst."},{"key":"2084_CR11","doi-asserted-by":"publisher","unstructured":"He, B., Wang, J., Qiu, J., Bui, T., Shrivastava, A., Wang, Z.: Align and attend: Multimodal summarization with dual contrastive losses. In: CVPR, pp. 14867\u201314878 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01428","DOI":"10.1109\/CVPR52729.2023.01428"},{"key":"2084_CR12","doi-asserted-by":"publisher","unstructured":"Huang, J.-H., Murn, L., Mrak, M., Worring, M.: Gpt2mvs: Generative pre-trained transformer-2 for multi-modal video summarization. In: ICMR, pp. 580\u2013589 (2021). https:\/\/doi.org\/10.1145\/3460426.3463662","DOI":"10.1145\/3460426.3463662"},{"key":"2084_CR13","doi-asserted-by":"publisher","first-page":"3296","DOI":"10.1109\/TMM.2022.3157993","volume":"25","author":"N Liu","year":"2022","unstructured":"Liu, N., Sun, X., Yu, H., et al.: Abstractive summarization for video: a revisit in multistage fusion network with forget gate. IEEE Trans. Multimed. 25, 3296\u20133310 (2022). https:\/\/doi.org\/10.1109\/TMM.2022.3157993","journal-title":"IEEE Trans. Multimed."},{"key":"2084_CR14","doi-asserted-by":"publisher","unstructured":"Shang, X., Yuan, Z., Wang, A., Wang, C.: Multimodal video summarization via time-aware transformers. In: ACM MM, pp. 1756\u20131765 (2021). https:\/\/doi.org\/10.1145\/3474085.3475321","DOI":"10.1145\/3474085.3475321"},{"key":"2084_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105168","volume":"149","author":"D Cheng","year":"2024","unstructured":"Cheng, D., Kong, S., Jiang, B., Guo, Q.: Transferable dual multi-granularity semantic excavating for partially relevant video retrieval. Image Vis. Comput. 149, 105168 (2024). https:\/\/doi.org\/10.1016\/j.imavis.2024.105168","journal-title":"Image Vis. Comput."},{"key":"2084_CR16","doi-asserted-by":"publisher","unstructured":"Zhang, Z., Elfardy, H., Dreyer, M., etal.: Enhancing multi-document summarization with cross-document graph-based information extraction. In: EACL, pp. 1696\u20131707 (2023). https:\/\/doi.org\/10.18653\/v1\/2023.eacl-main.124","DOI":"10.18653\/v1\/2023.eacl-main.124"},{"key":"2084_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.119308","volume":"215","author":"A Ghadimi","year":"2023","unstructured":"Ghadimi, A., Beigy, H.: Sgcsumm: an extractive multi-document summarization method based on pre-trained language model, submodularity, and graph convolutional neural networks. Expert Syst. Appl. 215, 119308 (2023). https:\/\/doi.org\/10.1016\/j.eswa.2022.119308","journal-title":"Expert Syst. Appl."},{"key":"2084_CR18","doi-asserted-by":"publisher","unstructured":"Chaves, J.M.R., Tripathi, S.: Videosage: Video summarization with graph representation learning. In: CVPR, pp. 2527\u20132534 (2024). https:\/\/doi.org\/10.1109\/CVPRW63382.2024.00259","DOI":"10.1109\/CVPRW63382.2024.00259"},{"key":"2084_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128945","volume":"617","author":"C-H Yeh","year":"2025","unstructured":"Yeh, C.-H., Lien, C.-M., Zhan, Z.-X., et al.: Graph convolutional network for fast video summarization in compressed domain. Neurocomput. 617, 128945 (2025). https:\/\/doi.org\/10.1016\/j.neucom.2024.128945","journal-title":"Neurocomput."},{"key":"2084_CR20","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/j.patrec.2021.03.013","volume":"146","author":"A Sahu","year":"2021","unstructured":"Sahu, A., Chowdhury, A.S.: First person video summarization using different graph representations. Pattern Recognit. Lett. 146, 185\u2013192 (2021). https:\/\/doi.org\/10.1016\/j.patrec.2021.03.013","journal-title":"Pattern Recognit. Lett."},{"key":"2084_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.123860","volume":"250","author":"G Wu","year":"2024","unstructured":"Wu, G., Song, S., Wang, X., Zhang, J.: Reconstructive network under contrastive graph rewards for video summarization. Expert Syst. Appl. 250, 123860 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2024.123860","journal-title":"Expert Syst. Appl."},{"key":"2084_CR22","doi-asserted-by":"publisher","first-page":"3017","DOI":"10.1109\/TIP.2022.3163855","volume":"31","author":"W Zhu","year":"2022","unstructured":"Zhu, W., Han, Y., Lu, J., Zhou, J.: Relational reasoning over spatial-temporal graphs for video summarization. IEEE Trans. Image Process. 31, 3017\u20133031 (2022). https:\/\/doi.org\/10.1109\/TIP.2022.3163855","journal-title":"IEEE Trans. Image Process."},{"key":"2084_CR23","doi-asserted-by":"publisher","first-page":"13631","DOI":"10.1609\/aaai.v35i15.17607","volume":"35","author":"P Papalampidi","year":"2021","unstructured":"Papalampidi, P., Keller, F., Lapata, M.: Movie summarization via sparse graph construction. AAAI 35, 13631\u201313639 (2021)","journal-title":"AAAI"},{"key":"2084_CR24","unstructured":"Chen, Y., Wu, L., Zaki, M.: Iterative deep graph learning for graph neural networks: Better and robust node embeddings. In: NeurIPS, 33:19314\u201319326 (2020)"},{"key":"2084_CR25","unstructured":"Mihalcea, R., Tarau, P.: Textrank: Bringing order into text. In: EMNLP, pp. 404\u2013411 (2004). https:\/\/aclanthology.org\/W04-3252\/"},{"key":"2084_CR26","doi-asserted-by":"publisher","unstructured":"Tong, H., Faloutsos, C., Pan, J.-Y.: Fast random walk with restart and its applications. In: ICDM, pp. 613\u2013622 (2006). https:\/\/doi.org\/10.1109\/ICDM.2006.70","DOI":"10.1109\/ICDM.2006.70"},{"key":"2084_CR27","doi-asserted-by":"publisher","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: ICCV, pp. 2980\u20132988 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.324","DOI":"10.1109\/ICCV.2017.324"},{"key":"2084_CR28","doi-asserted-by":"publisher","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: Tvsum: Summarizing web videos using titles. In: CVPR, pp. 5179\u20135187 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7299154","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"2084_CR29","doi-asserted-by":"publisher","unstructured":"Gygli, M., Grabner, H., Riemenschneider, H., Van\u00a0Gool, L.: Creating summaries from user videos. In: ECCV, pp. 505\u2013520 (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_33","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"2084_CR30","doi-asserted-by":"publisher","unstructured":"Otani, M., Nakashima, Y., Rahtu, E., Heikkila, J.: Rethinking the evaluation of video summaries. In: CVPR, pp. 7596\u20137604 (2019).https:\/\/doi.org\/10.1109\/CVPR.2019.00778","DOI":"10.1109\/CVPR.2019.00778"},{"key":"2084_CR31","unstructured":"Lin, C.-Y.: Rouge: A package for automatic evaluation of summaries. In: ACL, pp. 74\u201381 (2004). https:\/\/aclanthology.org\/W04-1013\/"},{"key":"2084_CR32","doi-asserted-by":"publisher","unstructured":"Xie, J., Zhao, Z., Lin, Z., Shen, Y.: Multimodal graph learning for cross-modal retrieval. In: SDM, pp. 145\u2013153 (2023). https:\/\/doi.org\/10.1137\/1.9781611977653.ch17","DOI":"10.1137\/1.9781611977653.ch17"},{"issue":"5","key":"2084_CR33","doi-asserted-by":"publisher","first-page":"2793","DOI":"10.1109\/TPAMI.2021.3072117","volume":"44","author":"B Zhao","year":"2022","unstructured":"Zhao, B., Li, H., Lu, X., Li, X.: Reconstructive sequence-graph network for video summarization. IEEE Trans. Pattern Anal. Mach. Intell. 44(5), 2793\u20132801 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2021.3072117","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"2084_CR34","doi-asserted-by":"publisher","first-page":"3904","DOI":"10.1109\/TPAMI.2022.3186506","volume":"45","author":"H Li","year":"2023","unstructured":"Li, H., Ke, Q., Gong, M., Zhang, R.: Video joint modelling based on hierarchical transformer for co-summarization. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3904\u20133917 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3186506","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2084_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2021.09.015","volume":"467","author":"G Liang","year":"2022","unstructured":"Liang, G., Lv, Y., Li, S., et al.: Video summarization with a dual-path attentive network. Neurocomput. 467, 1\u20139 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2021.09.015","journal-title":"Neurocomput."},{"key":"2084_CR36","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/LSP.2023.3313091","volume":"30","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Liu, Y., Kang, W., Zheng, Y.: Mar-net: motion-assisted reconstruction network for unsupervised video summarization. IEEE Signal Process. Lett. 30, 1282\u20131286 (2023). https:\/\/doi.org\/10.1109\/LSP.2023.3313091","journal-title":"IEEE Signal Process. Lett."},{"issue":"8","key":"2084_CR37","doi-asserted-by":"publisher","first-page":"4122","DOI":"10.1109\/TCSVT.2023.3240464","volume":"33","author":"Y Xu","year":"2023","unstructured":"Xu, Y., Li, X., Pan, L., et al.: Self-supervised adversarial video summarizer with context latent sequence learning. IEEE Trans. Circuits Syst. Video Technol. 33(8), 4122\u20134136 (2023). https:\/\/doi.org\/10.1109\/TCSVT.2023.3240464","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"7","key":"2084_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3654670","volume":"20","author":"T Han","year":"2024","unstructured":"Han, T., Zhou, Q., Yu, J., et al.: Effective video summarization by extracting parameter-free motion attention. ACM Trans. Multimed. Comput. Commun. Appl. 20(7), 1\u201320 (2024). https:\/\/doi.org\/10.1145\/3654670","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"2084_CR39","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.123568","volume":"249","author":"Y Zhang","year":"2024","unstructured":"Zhang, Y., Liu, Y., Wu, C.: Attention-guided multi-granularity fusion model for video summarization. Expert Syst. Appl. 249, 123568 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2024.123568","journal-title":"Expert Syst. Appl."},{"key":"2084_CR40","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.127360","volume":"279","author":"J Gupta","year":"2025","unstructured":"Gupta, J., Rai, D., Singh, P.: Gensumnet: a genre-specific and context-aware video summarization framework with adaptive thresholding and hybrid temporal modelling. Expert Syst. Appl. 279, 127360 (2025). https:\/\/doi.org\/10.1016\/j.eswa.2025.127360","journal-title":"Expert Syst. Appl."},{"key":"2084_CR41","doi-asserted-by":"publisher","unstructured":"Wang, Z., Di, S., Chen, L.: A message passing neural network space for better capturing data-dependent receptive fields. In: KDD, pp. 2489\u20132501 (2023). https:\/\/doi.org\/10.1007\/s12144-024-06894-6","DOI":"10.1007\/s12144-024-06894-6"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02084-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02084-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02084-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T04:20:06Z","timestamp":1770783606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02084-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2084"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02084-x","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"9 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and consent to participate"}},{"value":"Informed consent for publication was obtained from all participants.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"38"}}