{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T16:27:01Z","timestamp":1774369621602,"version":"3.50.1"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Ministry of Electronics and Information Technology (MeitY), Government of India","award":["Visvesvaraya Research Fellowship"],"award-info":[{"award-number":["Visvesvaraya Research Fellowship"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-025-11309-z","type":"journal-article","created":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T04:02:00Z","timestamp":1748836920000},"page":"16349-16380","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Supervised regularized attention-aware clock-triggered recurrent neural network for video summarization"],"prefix":"10.1007","volume":"37","author":[{"given":"Deeksha","family":"Gupta","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6125-6550","authenticated-orcid":false,"given":"Akashdeep","family":"Sharma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,2]]},"reference":[{"key":"11309_CR1","unstructured":"Cisco (2020) \u201cCisco Annual Internet Report (2018\u20132023). Cisco, pp. 1\u201341. [Online]. Available: http:\/\/grs.cisco.com\/grsx\/cust\/grsCustomerSurvey.html?SurveyCode=4153&ad_id=US-BN-SEC-M-CISCOASECURITYRPT-ENT&KeyCode=000112137"},{"issue":"1","key":"11309_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1198302.1198305","volume":"3","author":"BT Truong","year":"2007","unstructured":"Truong BT, Venkatesh S (2007) Video abstraction: a systematic review and classification. ACM Trans Multimed Comput Commun Appl 3(1):1\u201337. https:\/\/doi.org\/10.1145\/1198302.1198305","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"11309_CR3","doi-asserted-by":"publisher","unstructured":"Zhang K, Chao WL, Sha F, and Grauman K, (2016) Video summarization with long short-term memory. In Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics). vol 9911 LNCS. https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47.","DOI":"10.1007\/978-3-319-46478-7_47"},{"key":"11309_CR4","doi-asserted-by":"crossref","unstructured":"Zhou K, Qiao Y, and Xiang T, (2018) Deep reinforcement learning for unsupervised video summarization with diversity-representativeness reward. In : 32nd AAAI conference on artificial intelligence, AAAI 2018, pp 7582\u20137589","DOI":"10.1609\/aaai.v32i1.12255"},{"key":"11309_CR5","doi-asserted-by":"publisher","unstructured":"Zhang K, Grauman K, and Sha F (2018) Retrospective encoders for video summarization. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 11212 LNCS, pp. 391\u2013408. https:\/\/doi.org\/10.1007\/978-3-030-01237-3_24.","DOI":"10.1007\/978-3-030-01237-3_24"},{"key":"11309_CR6","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1109\/LSP.2021.3066349","volume":"28","author":"R Zhong","year":"2021","unstructured":"Zhong R, Wang R, Zou Y, Hong Z, Hu M (2021) Graph attention networks adjusted Bi-LSTM for video summarization. IEEE Signal Process Lett 28:663\u2013667. https:\/\/doi.org\/10.1109\/LSP.2021.3066349","journal-title":"IEEE Signal Process Lett"},{"key":"11309_CR7","doi-asserted-by":"publisher","unstructured":"Ng JYH, Hausknecht M, Vijayanarasimhan S, Vinyals O, Monga R, and Toderici G (2015) Beyond short snippets: deep networks for video classification. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol 07\u201312, pp 4694\u20134702. https:\/\/doi.org\/10.1109\/CVPR.2015.7299101.","DOI":"10.1109\/CVPR.2015.7299101"},{"issue":"1","key":"11309_CR8","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3D Convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231. https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11309_CR9","doi-asserted-by":"publisher","unstructured":"Wang J, Wang W, Huang Y, Wang L, and Tan T (2018) Multimodal memory modelling for video captioning. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp 7512\u20137520. https:\/\/doi.org\/10.1109\/CVPR.2018.00784.","DOI":"10.1109\/CVPR.2018.00784"},{"issue":"22","key":"11309_CR10","doi-asserted-by":"publisher","first-page":"8917","DOI":"10.3390\/s22228917","volume":"22","author":"B Martinez-Vega","year":"2022","unstructured":"Martinez-Vega B et al (2022) Evaluation of preprocessing methods on independent medical hyperspectral databases to improve analysis. Sensors 22(22):8917. https:\/\/doi.org\/10.3390\/s22228917","journal-title":"Sensors"},{"key":"11309_CR11","doi-asserted-by":"publisher","unstructured":"Hara K, Kataoka H, and Satoh Y (2018) Can spatiotemporal 3D CNNs retrace the history of 2D CNNs and ImageNet?. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp 6546\u20136555. https:\/\/doi.org\/10.1109\/CVPR.2018.00685.","DOI":"10.1109\/CVPR.2018.00685"},{"key":"11309_CR12","unstructured":"Koutn\u00edk J, Greff K, Gomez F, and Schmidhuber J (2014) A clockwork RNN. IN: 31st international conference on machine learning, ICML 2014, vol 5, pp 3881\u20133889"},{"issue":"5","key":"11309_CR13","first-page":"1","volume":"52","author":"VK Vivekraj","year":"2019","unstructured":"Vivekraj VK, Sen D, Raman B (2019) Video skimming : taxonomy and comprehensive survey. ACM Comput Surv 52(5):1\u201338","journal-title":"ACM Comput Surv"},{"key":"11309_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-3569-x","author":"J Wu","year":"2016","unstructured":"Wu J, Zhong S, Jiang J (2016) A novel clustering method for static video summarization. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-016-3569-x","journal-title":"Multimed Tools Appl"},{"key":"11309_CR15","doi-asserted-by":"publisher","first-page":"7383","DOI":"10.1007\/s11042-017-4642-9","volume":"77","author":"K Kumar","year":"2018","unstructured":"Kumar K, Shrimankar DD, Singh N (2018) Eratosthenes sieve based key-frame extraction technique for event summarization in videos. Multimed Tools Appl 77:7383\u20137404. https:\/\/doi.org\/10.1007\/s11042-017-4642-9","journal-title":"Multimed Tools Appl"},{"key":"11309_CR16","doi-asserted-by":"publisher","unstructured":"Elhamifar E and De Paolis Kaluza MC (2017) Online summarization via submodular and convex optimization. In: Proceedings - 30th IEEE conference on computer vision and pattern recognition, CVPR 2017, vol 2017-Janua, pp 1818\u20131826. https:\/\/doi.org\/10.1109\/CVPR.2017.197.","DOI":"10.1109\/CVPR.2017.197"},{"key":"11309_CR17","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1109\/TIP.2022.3146012","volume":"31","author":"M Ma","year":"2022","unstructured":"Ma M, Mei S, Wan S, Wang Z, Hua XS, Feng DD (2022) Graph convolutional dictionary selection with L2, p norm for video summarization. IEEE Trans Image Process 31:1789\u20131804. https:\/\/doi.org\/10.1109\/TIP.2022.3146012","journal-title":"IEEE Trans Image Process"},{"key":"11309_CR18","doi-asserted-by":"publisher","first-page":"812","DOI":"10.1016\/j.procs.2016.06.065","volume":"89","author":"M Srinivas","year":"2016","unstructured":"Srinivas M, Pai MMM, Pai RM (2016) An improved algorithm for video summarization\u2014a rank based approach. Proc\u2014Procedia Comput Sci 89:812\u2013819. https:\/\/doi.org\/10.1016\/j.procs.2016.06.065","journal-title":"Proc\u2014Procedia Comput Sci"},{"key":"11309_CR19","doi-asserted-by":"publisher","unstructured":"Ai X, Song Y, and Li Z (2018) Unsupervised video summarization based on consistent clip generation. In: 2018 IEEE 4th international conference on multimedia big data, BigMM pp 1\u20137. https:\/\/doi.org\/10.1109\/BigMM.2018.8499188.","DOI":"10.1109\/BigMM.2018.8499188"},{"key":"11309_CR20","doi-asserted-by":"publisher","unstructured":"Mahasseni B, Lam M, and Todorovic S (2017) Unsupervised video summarization with adversarial LSTM networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, vol 2017-Janua, pp 202\u2013211. https:\/\/doi.org\/10.1109\/CVPR.2017.318.","DOI":"10.1109\/CVPR.2017.318"},{"key":"11309_CR21","doi-asserted-by":"publisher","unstructured":"Apostolidis E, Metsai AI, Adamantidou E, Mezaris V, and Patras I (2019) A stepwise, label-based approach for improving the adversarial training in unsupervised video summarization. In: AI4TV 2019 - proceedings of the 1st international workshop on AI for smart TV content production, access and delivery, co-located with MM 2019, pp 17\u201325. https:\/\/doi.org\/10.1145\/3347449.3357482.","DOI":"10.1145\/3347449.3357482"},{"key":"11309_CR22","first-page":"1","volume":"31","author":"E Apostolidis","year":"2020","unstructured":"Apostolidis E, Adamantidou E, Metsai AI, Mezaris V, Patras I (2020) AC-SUM-GAN: connecting actor-critic and generative adversarial networks for unsupervised video summarization. IEEE Trans Circ Syst Video Technol 31:1\u201315","journal-title":"IEEE Trans Circ Syst Video Technol"},{"key":"11309_CR23","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/3317640.3317658","volume":"F1477","author":"L Wang","year":"2019","unstructured":"Wang L, Zhu Y, Pan H (2019) Unsupervised reinforcement learning for video summarization reward function. ACM Int Conf Proc Series F1477:40\u201344. https:\/\/doi.org\/10.1145\/3317640.3317658","journal-title":"ACM Int Conf Proc Series"},{"key":"11309_CR24","doi-asserted-by":"crossref","unstructured":"Yal\u0131n\u0131z G and Ikizler-Cinbis N (2019) Unsupervised video summarization with independently recurrent neural networks. In: 27th signal processing and communications applications conference (SIU), pp 1\u20134","DOI":"10.1109\/SIU.2019.8806603"},{"key":"11309_CR25","doi-asserted-by":"publisher","first-page":"1573","DOI":"10.1109\/TIP.2022.3143699","volume":"31","author":"T Liu","year":"2022","unstructured":"Liu T, Meng Q, Huang JJ, Vlontzos A, Rueckert D, Kainz B (2022) Video summarization through reinforcement learning with a 3D spatio-temporal U-Net. IEEE Trans Image Process 31:1573\u20131586. https:\/\/doi.org\/10.1109\/TIP.2022.3143699","journal-title":"IEEE Trans Image Process"},{"issue":"January","key":"11309_CR26","first-page":"2069","volume":"3","author":"B Gong","year":"2014","unstructured":"Gong B, Chao WL, Grauman K, Sha F (2014) Diverse sequential subset selection for supervised video summarization. Adv Neural Inf Process Syst 3(January):2069\u20132077","journal-title":"Adv Neural Inf Process Syst"},{"key":"11309_CR27","doi-asserted-by":"publisher","unstructured":"Gygli M, Grabner H, Riemenschneider H, and Van Gool L (2014) Creating summaries from user videos. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol 8695 LNCS, no. PART 7, pp 505\u2013520. https:\/\/doi.org\/10.1007\/978-3-319-10584-0_33.","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"11309_CR28","doi-asserted-by":"crossref","unstructured":"Potapov D et al. (2014) Category-specific video summarization. In: European conference on computer vision, Springer, Cham., pp 540\u2013555","DOI":"10.1007\/978-3-319-10599-4_35"},{"key":"11309_CR29","doi-asserted-by":"publisher","unstructured":"Zhao B, Li X, and Lu X (2017) Hierarchical recurrent neural network for video summarization. In: MM 2017 - proceedings of the 2017 ACM multimedia conference, pp 863\u2013871. https:\/\/doi.org\/10.1145\/3123266.3123328.","DOI":"10.1145\/3123266.3123328"},{"key":"11309_CR30","doi-asserted-by":"publisher","unstructured":"Zhao B, Li X, and Lu X (2018) HSA-RNN: hierarchical structure-adaptive RNN for video summarization. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp 7405\u20137414. https:\/\/doi.org\/10.1109\/CVPR.2018.00773.","DOI":"10.1109\/CVPR.2018.00773"},{"key":"11309_CR31","doi-asserted-by":"publisher","DOI":"10.1145\/3321408.3322622","author":"Y Zhang","year":"2019","unstructured":"Zhang Y, Zhao X, Kampffmeyer M, Tan M (2019) DTR-GAN: dilated temporal relational adversarial network for video summarization. ACM Int Conf Proc Ser. https:\/\/doi.org\/10.1145\/3321408.3322622","journal-title":"ACM Int Conf Proc Ser"},{"key":"11309_CR32","doi-asserted-by":"publisher","unstructured":"Rochan M, Ye L, and Wang Y (2018) Video summarization using fully convolutional sequence networks. In: Proceedings of the European conference on computer vision (ECCV), pp 347\u2013363. https:\/\/doi.org\/10.1007\/978-3-030-01258-8_22.","DOI":"10.1007\/978-3-030-01258-8_22"},{"key":"11309_CR33","doi-asserted-by":"publisher","unstructured":"Feng L, Kuang Z, Li Z, and Zhang W (2018) Extractive video summarizer with memory augmented neural networks. In: MM 2018 - proceedings of the 2018 ACM multimedia conference, pp 976\u2013983. https:\/\/doi.org\/10.1145\/3240508.3240651.","DOI":"10.1145\/3240508.3240651"},{"key":"11309_CR34","doi-asserted-by":"publisher","unstructured":"Sahrawat D et al. (2019) Video summarization using global attention with memory network and LSTM. In: Proceedings - 2019 IEEE 5th international conference on multimedia big data. BigMM 2019, pp 231\u2013236. https:\/\/doi.org\/10.1109\/BigMM.2019.00-20.","DOI":"10.1109\/BigMM.2019.00-20"},{"key":"11309_CR35","doi-asserted-by":"publisher","unstructured":"Wang J, Wang W, Wang Z, Wang L, Feng D, and Tan T (2019) Stacked memory network for video summarization. In: MM 2019 - proceedings of the 27th ACM international conference on multimedia, pp 836\u2013844, 2019, https:\/\/doi.org\/10.1145\/3343031.3350992.","DOI":"10.1145\/3343031.3350992"},{"key":"11309_CR36","doi-asserted-by":"publisher","unstructured":"Fajtl J, Sokeh HS, Argyriou V, Monekosso D, and Remagnino P (2019) Summarizing videos with attention. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol 11367 LNCS, pp 39\u201354. https:\/\/doi.org\/10.1007\/978-3-030-21074-8_4.","DOI":"10.1007\/978-3-030-21074-8_4"},{"issue":"6","key":"11309_CR37","doi-asserted-by":"publisher","first-page":"1709","DOI":"10.1109\/TCSVT.2019.2904996","volume":"30","author":"Z Ji","year":"2020","unstructured":"Ji Z, Xiong K, Pang Y, Li X (2020) Video summarization with attention-based encoder-decoder networks. IEEE Trans Circ Syst Video Technol 30(6):1709\u20131717. https:\/\/doi.org\/10.1109\/TCSVT.2019.2904996","journal-title":"IEEE Trans Circ Syst Video Technol"},{"key":"11309_CR38","doi-asserted-by":"publisher","unstructured":"Liu YT, Li YJ, Yang FE, Chen SF, and Wang YCF (2019) Learning hierarchical self-attention for video summarization. In: Proceedings\u2014international conference on image processing, ICIP, vol 2019, pp 3377\u20133381. https:\/\/doi.org\/10.1109\/ICIP.2019.8803639.","DOI":"10.1109\/ICIP.2019.8803639"},{"issue":"4","key":"11309_CR39","doi-asserted-by":"publisher","first-page":"1765","DOI":"10.1109\/TNNLS.2020.2991083","volume":"32","author":"Z Ji","year":"2021","unstructured":"Ji Z, Zhao Y, Pang Y, Li X, Han J (2021) Deep attentive video summarization with distribution consistency learning. IEEE Trans Neural Netw Learn Syst 32(4):1765\u20131775. https:\/\/doi.org\/10.1109\/TNNLS.2020.2991083","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11309_CR40","doi-asserted-by":"crossref","unstructured":"Feng S, Xie Y, Wei Y, Yan J, and Wang Q (2022) Transformer-based video summarization with spatial-temporal representation. no. BigDIA, pp 428\u2013433","DOI":"10.1109\/BigDIA56350.2022.9874248"},{"key":"11309_CR41","doi-asserted-by":"publisher","unstructured":"Xie Y, Zhang Z, Sapkota M, and LYB (2016) Spatial clockwork recurrent neural network for muscle perimysium segmentation. In: Proceedings of the international conference on medical image computing and computer-assisted intervention, Athens, Greece, 11\u201321 October, 2016, vol. 1, pp. 185\u2013193. https:\/\/doi.org\/10.1007\/978-3-319-46723-8.","DOI":"10.1007\/978-3-319-46723-8"},{"key":"11309_CR42","doi-asserted-by":"publisher","unstructured":"Lin C, Wang H, Yuan J, Yu D, and Li C (2019) Research on UUV obstacle avoiding method based on recurrent neural networks. Complexity 2019. https:\/\/doi.org\/10.1155\/2019\/6320186.","DOI":"10.1155\/2019\/6320186"},{"key":"11309_CR43","doi-asserted-by":"publisher","unstructured":"Liu W, Gu Y, Ding Y, Lu W, Rui X and Tao L (2020) A spatial and temporal combination model for traffic flow: a case study of beijing expressway. In: 2020 IEEE 5th international conference on intelligent transportation engineering, ICITE 2020, pp 76\u201381. https:\/\/doi.org\/10.1109\/ICITE50838.2020.9231430.","DOI":"10.1109\/ICITE50838.2020.9231430"},{"key":"11309_CR44","unstructured":"Kay W et al. (2017) The kinetics human action video dataset. [Online]. Available: http:\/\/arxiv.org\/abs\/1705.06950"},{"key":"11309_CR45","doi-asserted-by":"publisher","first-page":"5998","DOI":"10.1109\/2943.974352","volume":"8","author":"A Vaswani","year":"2017","unstructured":"Vaswani A et al (2017) Attention is all you need. Adv Neural Inf Process Syst 8:5998\u20136008. https:\/\/doi.org\/10.1109\/2943.974352","journal-title":"Adv Neural Inf Process Syst"},{"key":"11309_CR46","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1016\/j.neucom.2020.04.132","volume":"405","author":"Z Ji","year":"2020","unstructured":"Ji Z, Jiao F, Pang Y, Shao L (2020) Deep attentive and semantic preserving video summarization. Neurocomputing 405:200\u2013207. https:\/\/doi.org\/10.1016\/j.neucom.2020.04.132","journal-title":"Neurocomputing"},{"key":"11309_CR47","doi-asserted-by":"publisher","unstructured":"Song Y, Vallmitjana J, Stent A, and Jaimes A (2015) TVSum: summarizing web videos using titles. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol 07\u201312, pp 5179\u20135187. https:\/\/doi.org\/10.1109\/CVPR.2015.7299154.","DOI":"10.1109\/CVPR.2015.7299154"},{"key":"11309_CR48","doi-asserted-by":"publisher","unstructured":"Zeng KH, Chen TH, Niebles JC, and Sun M (2016) Title generation for user generated videos. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 9906 LNCS, no. September, pp. 609\u2013625. https:\/\/doi.org\/10.1007\/978-3-319-46475-6_38.","DOI":"10.1007\/978-3-319-46475-6_38"},{"key":"11309_CR49","doi-asserted-by":"publisher","unstructured":"Fu CY, Lee J, Bansal M, and Berg AC (2017) Video highlight prediction using audience chat reactions. In: EMNLP 2017 - conference on empirical methods in natural language processing, Proceedings, pp 972\u2013978. https:\/\/doi.org\/10.18653\/v1\/d17-1102.","DOI":"10.18653\/v1\/d17-1102"},{"issue":"1","key":"11309_CR50","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.patrec.2010.08.004","volume":"32","author":"S Avila","year":"2011","unstructured":"Avila S et al (2011) VSUMM : a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn Lett 32(1):56\u201368. https:\/\/doi.org\/10.1016\/j.patrec.2010.08.004","journal-title":"Pattern Recogn Lett"},{"issue":"1","key":"11309_CR51","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.patrec.2010.08.004","volume":"32","author":"SEF De Avila","year":"2011","unstructured":"De Avila SEF, Lopes APB, Da Luz A, De Albuquerque Ara\u00fajo A (2011) VSUMM: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recognit Lett 32(1):56\u201368. https:\/\/doi.org\/10.1016\/j.patrec.2010.08.004","journal-title":"Pattern Recognit Lett"},{"key":"11309_CR52","first-page":"56","volume":"32","author":"A Paszke","year":"2019","unstructured":"Paszke A et al (2019) PyTorch: an imperative style, high-performance deep learning library. Adv Neural Inf Process Syst 32:56\u201368","journal-title":"Adv Neural Inf Process Syst"},{"key":"11309_CR53","doi-asserted-by":"publisher","unstructured":"Datta R, Joshi D, Li J, and Wang JZ (2006) Studying aesthetics in photographic images using a computational approach. Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol 3953 LNCS, pp 288\u2013301. https:\/\/doi.org\/10.1007\/11744078_23.","DOI":"10.1007\/11744078_23"},{"key":"11309_CR54","doi-asserted-by":"publisher","unstructured":"Hasler D and Suesstrunk SE (2003) Measuring colorfulness in natural images. Human vision and electronic imaging VIII, vol 5007, no. May, p 87. https:\/\/doi.org\/10.1117\/12.477378.","DOI":"10.1117\/12.477378"},{"issue":"3","key":"11309_CR55","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1016\/j.imavis.2009.06.006","volume":"28","author":"S Montabone","year":"2010","unstructured":"Montabone S, Soto A (2010) Human detection using a mobile platform and novel features derived from a visual saliency mechanism. Image Vis Comput 28(3):391\u2013402. https:\/\/doi.org\/10.1016\/j.imavis.2009.06.006","journal-title":"Image Vis Comput"},{"key":"11309_CR56","doi-asserted-by":"publisher","unstructured":"Wang B and Dudek P (2014) A fast self-tuning background subtraction algorithm. In: IEEE computer society conference on computer vision and pattern recognition workshops, pp 401\u2013404. https:\/\/doi.org\/10.1109\/CVPRW.2014.64.","DOI":"10.1109\/CVPRW.2014.64"},{"key":"11309_CR57","doi-asserted-by":"publisher","unstructured":"He X et al. (2019) Unsupervised video summarization with attentive conditional generative adversarial networks. In: MM 2019 - proceedings of the 27th ACM international conference on multimedia, pp. 2296\u20132304. https:\/\/doi.org\/10.1145\/3343031.3351056.","DOI":"10.1145\/3343031.3351056"},{"issue":"10","key":"11309_CR58","doi-asserted-by":"publisher","first-page":"3989","DOI":"10.1109\/TNNLS.2019.2951680","volume":"31","author":"B Zhao","year":"2020","unstructured":"Zhao B, Li X, Lu X (2020) Property-constrained dual learning for video summarization. IEEE Trans Neural Netw Learn Syst 31(10):3989\u20134000. https:\/\/doi.org\/10.1109\/TNNLS.2019.2951680","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"February2021","key":"11309_CR59","doi-asserted-by":"publisher","first-page":"107618","DOI":"10.1016\/j.compeleceng.2021.107618","volume":"97","author":"J Lin","year":"2022","unstructured":"Lin J, Zhong S, Fares A (2022) Deep hierarchical LSTM networks with attention for video. Comput Electr Eng 97(February2021):107618. https:\/\/doi.org\/10.1016\/j.compeleceng.2021.107618","journal-title":"Comput Electr Eng"},{"key":"11309_CR60","doi-asserted-by":"publisher","unstructured":"Rochan M and Wang Y (2019) Video summarization by learning from unpaired data. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2019, pp. 7894\u20137903. https:\/\/doi.org\/10.1109\/CVPR.2019.00809.","DOI":"10.1109\/CVPR.2019.00809"},{"key":"11309_CR61","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2021.3072117","volume":"8828","author":"B Zhao","year":"2021","unstructured":"Zhao B, Li H, Lu X, Li X (2021) Reconstructive sequence-graph network for video summarization. IEEE Trans Pattern Anal Mach Intell 8828:1\u201310. https:\/\/doi.org\/10.1109\/TPAMI.2021.3072117","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11309_CR62","doi-asserted-by":"publisher","first-page":"948","DOI":"10.1109\/TIP.2020.3039886","volume":"30","author":"W Zhu","year":"2021","unstructured":"Zhu W, Lu J, Li J, Zhou J (2021) DSNet: a flexible detect-to-summarize network for video summarization. IEEE Trans Image Process 30:948\u2013962. https:\/\/doi.org\/10.1109\/TIP.2020.3039886","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"11309_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2822907","volume":"7","author":"Z Li","year":"2016","unstructured":"Li Z, Tang J, Wang X, Liu J, Lu H (2016) Multimedia news summarization in search. ACM Trans Intell Syst Technol 7(3):1\u201320. https:\/\/doi.org\/10.1145\/2822907","journal-title":"ACM Trans Intell Syst Technol"},{"key":"11309_CR64","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10429-z","volume-title":"A comprehensive study of automatic video summarization techniques","author":"D Gupta","year":"2023","unstructured":"Gupta D, Sharma A (2023) A comprehensive study of automatic video summarization techniques, vol 56. Springer, Netherlands. https:\/\/doi.org\/10.1007\/s10462-023-10429-z"},{"key":"11309_CR65","doi-asserted-by":"publisher","unstructured":"Yao T, Mei T, and Rui Y (2016) Highlight detection with pairwise deep ranking for first-person video summarization. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2016, pp. 982\u2013990. https:\/\/doi.org\/10.1109\/CVPR.2016.112.","DOI":"10.1109\/CVPR.2016.112"},{"key":"11309_CR66","doi-asserted-by":"publisher","unstructured":"Fu TJ, Tai SH, and Chen HT (2019) Attentive and adversarial learning for video summarization. In: Proceedings - 2019 IEEE Winter conference on applications of computer vision, WACV 2019, pp. 1579\u20131587. https:\/\/doi.org\/10.1109\/WACV.2019.00173.","DOI":"10.1109\/WACV.2019.00173"},{"key":"11309_CR67","doi-asserted-by":"publisher","unstructured":"Cho K et al. (2014) Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP 2014 - 2014 conference on empirical methods in natural language processing, proceedings of the conference, pp. 1724\u20131734. https:\/\/doi.org\/10.3115\/v1\/d14-1179.","DOI":"10.3115\/v1\/d14-1179"},{"key":"11309_CR68","unstructured":"Simonyan K and Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: 3rd international conference on learning representations, ICLR 2015 - Conference Track Proceedings, pp. 1\u201314"},{"issue":"6","key":"11309_CR69","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"BA Krizhevsky","year":"2012","unstructured":"Krizhevsky BA, Sutskever I, Hinton GE (2012) ImageNet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"11309_CR70","doi-asserted-by":"publisher","unstructured":"Szegedy C et al. (2015) Going deeper with convolutions. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 07\u201312, pp. 1\u20139. https:\/\/doi.org\/10.1109\/CVPR.2015.7298594.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"11309_CR71","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, and Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2016, pp. 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90.","DOI":"10.1109\/CVPR.2016.90"},{"key":"11309_CR72","doi-asserted-by":"publisher","unstructured":"Carreira J and Zisserman A (2017) Quo Vadis, action recognition? A new model and the kinetics dataset. In: Proceedings\u201430th IEEE conference on computer vision and pattern recognition, CVPR. 2017, pp. 4724\u20134733. https:\/\/doi.org\/10.1109\/CVPR.2017.502.","DOI":"10.1109\/CVPR.2017.502"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11309-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11309-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11309-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T17:20:51Z","timestamp":1757179251000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11309-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,2]]},"references-count":72,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["11309"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11309-z","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,2]]},"assertion":[{"value":"1 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}