{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T23:30:34Z","timestamp":1771889434121,"version":"3.50.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["New Gener. Comput."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s00354-025-00303-7","type":"journal-article","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T15:59:02Z","timestamp":1760284742000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Reinforcement Learning Based Video Summarization Using Attention Aware Dilated RNN"],"prefix":"10.1007","volume":"43","author":[{"given":"Deeksha","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Manisha","family":"Kaushal","sequence":"additional","affiliation":[]},{"given":"Akashdeep","family":"Sharma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"303_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10429-z","author":"D Gupta","year":"2023","unstructured":"Gupta, D., Sharma, A.: A comprehensive study of automatic video summarization techniques. Artif. Intell. Rev. (2023). https:\/\/doi.org\/10.1007\/s10462-023-10429-z","journal-title":"Artif. Intell. Rev."},{"issue":"1","key":"303_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1198302.1198305","volume":"3","author":"BATU Truong","year":"2007","unstructured":"Truong, B.A.T.U., Venkatesh, S.: Video abstraction\u202f: a systematic review and classification. ACM Trans. Multimed. Comput. Commun. Appl. (TOMM) 3(1), 1\u201337 (2007). https:\/\/doi.org\/10.1145\/1198302.1198305","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl. (TOMM)"},{"issue":"5","key":"303_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3347712","volume":"52","author":"VV K.","year":"2019","unstructured":"K., V.V., Sen, D., Raman, B.: Video skimming. ACM Comput. Surv. 52(5), 1\u201338 (2019). https:\/\/doi.org\/10.1145\/3347712","journal-title":"ACM Comput. Surv."},{"key":"303_CR4","doi-asserted-by":"publisher","unstructured":"Zhang, K., Chao, W.L., Sha, F., Grauman, K.: Video summarization with long short-term memory. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 9911 LNCS. (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_47","DOI":"10.1007\/978-3-319-46478-7_47"},{"key":"303_CR5","doi-asserted-by":"publisher","unstructured":"Wang, L., Zhu, Y., Pan, H.: Unsupervised reinforcement learning for video summarization reward function. In: ACM International Conference Proceeding Series, vol. Part F1477, pp. 40\u201344. (2019). https:\/\/doi.org\/10.1145\/3317640.3317658","DOI":"10.1145\/3317640.3317658"},{"key":"303_CR6","doi-asserted-by":"publisher","unstructured":"Zhao, B., Li, X., Lu, X.: Hierarchical recurrent neural network for video summarization. In: MM 2017 - Proceedings of the 2017 ACM Multimedia Conference, pp. 863\u2013871. (2017). https:\/\/doi.org\/10.1145\/3123266.3123328","DOI":"10.1145\/3123266.3123328"},{"key":"303_CR7","doi-asserted-by":"publisher","unstructured":"Zhao, B., Li, X., Lu, X.: HSA-RNN: hierarchical structure-adaptive RNN for video summarization. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 7405\u20137414. (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00773","DOI":"10.1109\/CVPR.2018.00773"},{"key":"303_CR8","first-page":"77","volume":"2017","author":"S Chang","year":"2017","unstructured":"Chang, S., et al.: Dilated recurrent neural networks. Adv. Neural Inform. Process. Syst. 2017, 77\u201387 (2017)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"303_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/2943.974352","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inform. Process. Syst. (2017). https:\/\/doi.org\/10.1109\/2943.974352","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"303_CR10","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1109\/TPAMI.2021.3132068","volume":"10","author":"Z Li","year":"2021","unstructured":"Li, Z., Sun, Y., Zhang, L., Tang, J.: CTNet: context-based tandem network for semantic segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 10, 200 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3132068","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"303_CR11","doi-asserted-by":"crossref","unstructured":"Vishwakarma, S., Li, W., Tang, C., Woodbridge, K., Adve, R.R., Chetty, K.: Attention\u2010enhanced Alexnet for improved radar micro\u2010Doppler signature classification. IET Radar, Sonar & Navigation (2022)","DOI":"10.1049\/rsn2.12369"},{"issue":"1","key":"303_CR12","first-page":"7582","volume":"32","author":"K Zhou","year":"2018","unstructured":"Zhou, K., Qiao, Y., Xiang, T.: Deep reinforcement learning for unsupervised video summarization with diversity-representativeness reward. Proc. AAAI Conf. Artif. Intell. 32(1), 7582\u20137589 (2018)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"303_CR13","doi-asserted-by":"publisher","unstructured":"Zhang, K., Grauman, K., Sha, F.: Retrospective encoders for video summarization. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 11212 LNCS, pp. 391\u2013408, (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_24","DOI":"10.1007\/978-3-030-01237-3_24"},{"key":"303_CR14","doi-asserted-by":"publisher","DOI":"10.1145\/3321408.3322622","author":"Y Zhang","year":"2019","unstructured":"Zhang, Y., Zhao, X., Kampffmeyer, M., Tan, M.: DTR-GAN: dilated temporal relational adversarial network for video summarization. ACM Int. Conf. Proc. Ser. (2019). https:\/\/doi.org\/10.1145\/3321408.3322622","journal-title":"ACM Int. Conf. Proc. Ser."},{"key":"303_CR15","doi-asserted-by":"publisher","unstructured":"Fajtl, J., Sokeh, H.S., Argyriou, V., Monekosso, D., Remagnino, P.: Summarizing videos with attention. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 11367 LNCS, pp. 39\u201354. (2019). https:\/\/doi.org\/10.1007\/978-3-030-21074-8_4","DOI":"10.1007\/978-3-030-21074-8_4"},{"issue":"6","key":"303_CR16","doi-asserted-by":"publisher","first-page":"1709","DOI":"10.1109\/TCSVT.2019.2904996","volume":"30","author":"Z Ji","year":"2020","unstructured":"Ji, Z., Xiong, K., Pang, Y., Li, X.: Video summarization with attention-based encoder-decoder networks. IEEE Trans. Circuits Syst. Video Technol. 30(6), 1709\u20131717 (2020). https:\/\/doi.org\/10.1109\/TCSVT.2019.2904996","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"303_CR17","doi-asserted-by":"publisher","first-page":"3377","DOI":"10.1109\/ICIP.2019.8803639","volume":"2019","author":"YT Liu","year":"2019","unstructured":"Liu, Y.T., Li, Y.J., Yang, F.E., Chen, S.F., Wang, Y.C.F.: Learning hierarchical self-attention for video summarization. Proc. Int. Conf. Image Process ICIP 2019, 3377\u20133381 (2019). https:\/\/doi.org\/10.1109\/ICIP.2019.8803639","journal-title":"Proc. Int. Conf. Image Process ICIP"},{"key":"303_CR18","doi-asserted-by":"crossref","unstructured":"Apostolidis, E.: Combining global and local attention with positional encoding for video summarization. IEEE International Symposium on Multimedia (ISM) (2021)","DOI":"10.1109\/ISM52913.2021.00045"},{"issue":"4","key":"303_CR19","doi-asserted-by":"publisher","first-page":"1765","DOI":"10.1109\/TNNLS.2020.2991083","volume":"32","author":"Z Ji","year":"2021","unstructured":"Ji, Z., Zhao, Y., Pang, Y., Li, X., Han, J.: Deep attentive video summarization with distribution consistency learning. IEEE Trans. Neural Netw. Learn. Syst. 32(4), 1765\u20131775 (2021). https:\/\/doi.org\/10.1109\/TNNLS.2020.2991083","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"303_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-023-01154-2","author":"D Gupta","year":"2023","unstructured":"Gupta, D., Sharma, A.: A two-stage attention augmented fully convolutional network-based dynamic video summarization. Multimedia Syst. (2023). https:\/\/doi.org\/10.1007\/s00530-023-01154-2","journal-title":"Multimedia Syst."},{"key":"303_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107677","volume":"111","author":"P Li","year":"2021","unstructured":"Li, P., Ye, Q., Zhang, L., Yuan, L., Xu, X., Shao, L.: Exploring global diverse attention via pairwise temporal relation for video summarization. Pattern Recogn. 111, 107677 (2021). https:\/\/doi.org\/10.1016\/j.patcog.2020.107677","journal-title":"Pattern Recogn."},{"issue":"no. 1","key":"303_CR22","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.patrec.2010.08.004","volume":"32","author":"SEF De Avila","year":"2011","unstructured":"De Avila, S.E.F., Lopes, A.P.B., Da Luz, A., De Albuquerque Ara\u00fajo, A.: Vsumm: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn. Lett. 32(1), 56\u201368 (2011). https:\/\/doi.org\/10.1016\/j.patrec.2010.08.004","journal-title":"Pattern Recogn. Lett."},{"key":"303_CR23","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-3569-x","author":"J Wu","year":"2016","unstructured":"Wu, J., Zhong, S., Jiang, J.: A novel clustering method for static video summarization. Multimedia Tools Appl. (2016). https:\/\/doi.org\/10.1007\/s11042-016-3569-x","journal-title":"Multimedia Tools Appl."},{"key":"303_CR24","doi-asserted-by":"publisher","unstructured":"Mei, S., Guan, G., Wang, Z., He, M., Hua, X.S., Dagan Feng, D.: L2,0 constrained sparse dictionary selection for video summarization. In: Proceedings - IEEE International Conference on Multimedia and Expo, vol. 2014. https:\/\/doi.org\/10.1109\/ICME.2014.6890179","DOI":"10.1109\/ICME.2014.6890179"},{"key":"303_CR25","doi-asserted-by":"publisher","first-page":"11763","DOI":"10.1109\/ACCESS.2019.2891834","volume":"7","author":"M Ma","year":"2019","unstructured":"Ma, M., Mei, S., Wan, S., Wang, Z., Feng, D.: Video summarization via nonlinear sparse dictionary selection. IEEE Access 7, 11763\u201311774 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2891834","journal-title":"IEEE Access"},{"issue":"2","key":"303_CR26","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/s10844-016-0441-4","volume":"49","author":"H Jacob","year":"2017","unstructured":"Jacob, H., P\u00e1dua, F.L.C., Lacerda, A., Pereira, A.C.M.: A video summarization approach based on the emulation of bottom-up mechanisms of visual attention. J. Intell. Inf. Syst. 49(2), 193\u2013211 (2017). https:\/\/doi.org\/10.1007\/s10844-016-0441-4","journal-title":"J. Intell. Inf. Syst."},{"key":"303_CR27","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1016\/j.neucom.2015.05.126","volume":"174","author":"I Mehmood","year":"2016","unstructured":"Mehmood, I., Sajjad, M., Rho, S., Baik, S.W.: Divide-and-conquer based summarization framework for extracting affective video content. Neurocomputing 174, 393\u2013403 (2016). https:\/\/doi.org\/10.1016\/j.neucom.2015.05.126","journal-title":"Neurocomputing"},{"issue":"17","key":"303_CR28","doi-asserted-by":"publisher","first-page":"22083","DOI":"10.1007\/s11042-017-5479-y","volume":"77","author":"T Hu","year":"2018","unstructured":"Hu, T., Li, Z.: Video summarization via exploring the global and local importance. Multimedia Tools Appl. 77(17), 22083\u201322098 (2018). https:\/\/doi.org\/10.1007\/s11042-017-5479-y","journal-title":"Multimedia Tools Appl."},{"key":"303_CR29","doi-asserted-by":"publisher","unstructured":"Ai, X., Song, Y., Li, Z. (2018) Unsupervised video summarization based on consistent clip generation. In: 2018 IEEE 4th International Conference on Multimedia Big Data, BigMM 2018, pp. 1\u20137. (2018). https:\/\/doi.org\/10.1109\/BigMM.2018.8499188","DOI":"10.1109\/BigMM.2018.8499188"},{"key":"303_CR30","doi-asserted-by":"publisher","unstructured":"Mahasseni, B., Lam, M., Todorovic, S.: Unsupervised video summarization with adversarial LSTM networks. In In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, vol. 2017, pp. 202\u2013211. (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.318","DOI":"10.1109\/CVPR.2017.318"},{"key":"303_CR31","doi-asserted-by":"publisher","unstructured":"Apostolidis, E., Metsai, A.I., Adamantidou, E., Mezaris, V., Patras, I.: A stepwise, label-based approach for improving the adversarial training in unsupervised video summarization. In: AI4TV 2019 - Proceedings of the 1st International Workshop on AI for Smart TV Content Production, Access and Delivery, co-located with MM 2019, pp. 17\u201325. (2019). https:\/\/doi.org\/10.1145\/3347449.3357482","DOI":"10.1145\/3347449.3357482"},{"key":"303_CR32","doi-asserted-by":"publisher","unstructured":"Apostolidis, E., Adamantidou, E., Metsai, A.I., Mezaris, V., Patras, I.: Unsupervised video summarization via attention-driven adversarial learning. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 11961 LNCS, no. Mmm, pp. 492\u2013504. (2020). https:\/\/doi.org\/10.1007\/978-3-030-37731-1_40","DOI":"10.1007\/978-3-030-37731-1_40"},{"issue":"10","key":"303_CR33","doi-asserted-by":"publisher","first-page":"2711","DOI":"10.1109\/TMM.2019.2959451","volume":"22","author":"L Yuan","year":"2020","unstructured":"Yuan, L., Tay, F.E.H., Li, P., Feng, J.: Unsupervised video summarization with cycle-consistent adversarial LSTM networks. IEEE Trans. Multimedia 22(10), 2711\u20132722 (2020). https:\/\/doi.org\/10.1109\/TMM.2019.2959451","journal-title":"IEEE Trans. Multimedia"},{"key":"303_CR34","doi-asserted-by":"publisher","DOI":"10.1145\/3485472","author":"W Li","year":"2022","unstructured":"Li, W., Pan, G., Wang, C., Xing, Z., Han, Z.: From coarse to fine: hierarchical structure-aware video summarization. ACM Trans. Multimed. Comput. Commun. Appl. (2022). https:\/\/doi.org\/10.1145\/3485472","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"303_CR35","doi-asserted-by":"publisher","first-page":"1573","DOI":"10.1109\/TIP.2022.3143699","volume":"31","author":"T Liu","year":"2022","unstructured":"Liu, T., Meng, Q., Huang, J.J., Vlontzos, A., Rueckert, D., Kainz, B.: Video summarization through reinforcement learning with a 3D spatio-temporal U-net. IEEE Trans. Image Process. 31, 1573\u20131586 (2022). https:\/\/doi.org\/10.1109\/TIP.2022.3143699","journal-title":"IEEE Trans. Image Process."},{"key":"303_CR36","doi-asserted-by":"crossref","unstructured":"Yal\u0131n\u0131z, G., Ikizler-Cinbis, N.: Unsupervised video summarization with independently recurrent neural networks. In: 27th Signal Processing and Communications Applications Conference (SIU), pp. 1\u20134. (2019)","DOI":"10.1109\/SIU.2019.8806603"},{"key":"303_CR37","doi-asserted-by":"publisher","unstructured":"Ng, J.Y.H., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond short snippets: Deep networks for video classification. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 07\u201312-June, pp. 4694\u20134702. (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7299101","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"303_CR38","doi-asserted-by":"publisher","first-page":"4718","DOI":"10.24963\/ijcai.2019\/655","volume":"2019","author":"JY Liu","year":"2019","unstructured":"Liu, J.Y., Yang, Y.H.: Dilated convolution with dilated GRU for music source separation. IJCAI Int. Joint Conf. Artif. Intell. 2019, 4718\u20134724 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/655","journal-title":"IJCAI Int. Joint Conf. Artif. Intell."},{"issue":"3","key":"303_CR39","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1007\/s41666-020-00068-2","volume":"4","author":"T Zhu","year":"2020","unstructured":"Zhu, T., Li, K., Chen, J., Herrero, P., Georgiou, P.: Dilated recurrent neural networks for glucose forecasting in type 1 diabetes. J. Healthc. Inform. Res. 4(3), 308\u2013324 (2020). https:\/\/doi.org\/10.1007\/s41666-020-00068-2","journal-title":"J. Healthc. Inform. Res."},{"key":"303_CR40","doi-asserted-by":"publisher","unstructured":"Schoene, A.M., Turner, A., De Mel, G.R., Dethlefs, N.: Hierarchical multiscale recurrent neural networks for detecting suicide notes. In: IEEE Transactions on Affective Computing, vol. XX, no. X, pp. 1\u201312. (2021). https:\/\/doi.org\/10.1109\/TAFFC.2021.3057105","DOI":"10.1109\/TAFFC.2021.3057105"},{"issue":"no. 4","key":"303_CR41","doi-asserted-by":"publisher","first-page":"6212","DOI":"10.1109\/LRA.2021.3091698","volume":"6","author":"T Sun","year":"2021","unstructured":"Sun, T., Hu, Q., Gulati, P., Farokh Atashzar, S.: Temporal dilation of deep LSTM for agile decoding of sEMG: application in prediction of upper-limb motor intention in NeuroRobotics. IEEE Robot. Autom. Lett. 6(4), 6212\u20136219 (2021). https:\/\/doi.org\/10.1109\/LRA.2021.3091698","journal-title":"IEEE Robot. Autom. Lett."},{"key":"303_CR42","doi-asserted-by":"publisher","unstructured":"Rochan, M., Ye, L., Wang, Y. Video summarization using fully convolutional sequence networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 347\u2013363. (2018). https:\/\/doi.org\/10.1007\/978-3-030-01258-8_22","DOI":"10.1007\/978-3-030-01258-8_22"},{"key":"303_CR43","doi-asserted-by":"publisher","unstructured":"Apostolidis, E., Balaouras, G., Mezaris, V., Patras, I.: Summarizing videos using concentrated attention and considering the uniqueness and diversity of the video frames. In: Proceedings of the 2022 International Conference on Multimedia Retrieval, pp. 407\u2013415. (2022) https:\/\/doi.org\/10.1145\/3512527.3531404","DOI":"10.1145\/3512527.3531404"},{"key":"303_CR44","doi-asserted-by":"publisher","unstructured":"Szegedy, C. et al.: Going deeper with convolutions. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 07\u201312-June, pp. 1\u20139. (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"303_CR45","doi-asserted-by":"publisher","unstructured":"Datta, R., Joshi, D., Li, J., Wang, J.Z.: Studying aesthetics in photographic images using a computational approach. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 3953 LNCS, pp. 288\u2013301. (2006). https:\/\/doi.org\/10.1007\/11744078_23","DOI":"10.1007\/11744078_23"},{"key":"303_CR46","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1117\/12.477378","volume":"5007","author":"D Hasler","year":"2003","unstructured":"Hasler, D., Suesstrunk, S.E.: Measuring colorfulness in natural images. Human Vis. Electronic Imaging VIII 5007, 87 (2003). https:\/\/doi.org\/10.1117\/12.477378","journal-title":"Human Vis. Electronic Imaging VIII"},{"issue":"3","key":"303_CR47","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1016\/j.imavis.2009.06.006","volume":"28","author":"S Montabone","year":"2010","unstructured":"Montabone, S., Soto, A.: Human detection using a mobile platform and novel features derived from a visual saliency mechanism. Image Vis. Comput. 28(3), 391\u2013402 (2010). https:\/\/doi.org\/10.1016\/j.imavis.2009.06.006","journal-title":"Image Vis. Comput."},{"issue":"3","key":"303_CR48","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1023\/A:1022672621406","volume":"8","author":"RJ Willia","year":"1992","unstructured":"Willia, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3), 229\u2013256 (1992). https:\/\/doi.org\/10.1023\/A:1022672621406","journal-title":"Mach. Learn."},{"key":"303_CR49","doi-asserted-by":"publisher","unstructured":"Gygli, M., Grabner, H., Riemenschneider, H., Van Gool, L.: Creating summaries from user videos. In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 8695 LNCS, no. PART 7, pp. 505\u2013520. (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_33","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"303_CR50","doi-asserted-by":"publisher","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: TVSum: summarizing web videos using titles. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 07\u201312-June, pp. 5179\u20135187. (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7299154","DOI":"10.1109\/CVPR.2015.7299154"},{"issue":"1","key":"303_CR51","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.patrec.2010.08.004","volume":"32","author":"S Avila","year":"2011","unstructured":"Avila, S., et al.: VSUMM: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recogn. Lett. 32(1), 56\u201368 (2011). https:\/\/doi.org\/10.1016\/j.patrec.2010.08.004","journal-title":"Pattern Recogn. Lett."},{"key":"303_CR52","doi-asserted-by":"crossref","unstructured":"Potapov, D. et al. : Category-specific video summarization. In: European conference on computer vision, Springer, Cham, pp. 540\u2013555 (2014)","DOI":"10.1007\/978-3-319-10599-4_35"},{"key":"303_CR53","unstructured":"Paszke, A. et al.: PyTorch: an imperative style, high-performance deep learning library. Adv. Neural Inform. Process. Syst. 32 (2019)"},{"key":"303_CR54","doi-asserted-by":"publisher","unstructured":"Rochan, M. Wang, Y.: Video summarization by learning from unpaired data. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 2019-June, pp. 7894\u20137903. (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00809","DOI":"10.1109\/CVPR.2019.00809"},{"key":"303_CR55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2021.3072117","volume":"8828","author":"B Zhao","year":"2021","unstructured":"Zhao, B., Li, H., Lu, X., Li, X.: Reconstructive sequence-graph network for video summarization. IEEE Trans. Pattern Anal. Mach. Intell. 8828, 1\u201310 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3072117","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"303_CR56","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/LSP.2023.3313091","volume":"30","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Liu, Y., Kang, W., Zheng, Y.: Mar-net: motion-assisted reconstruction network for unsupervised video summarization. IEEE Signal Process. Lett. 30, 1282\u20131286 (2023). https:\/\/doi.org\/10.1109\/LSP.2023.3313091","journal-title":"IEEE Signal Process. Lett."},{"key":"303_CR57","doi-asserted-by":"publisher","unstructured":"Wang, Y., Xu, H., Luo, D.: Self-supervised video summarization guided by semantic inverse optimal transport. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 6611\u20136622, (2023). https:\/\/doi.org\/10.1145\/3581783.3612087","DOI":"10.1145\/3581783.3612087"},{"issue":"8","key":"303_CR58","doi-asserted-by":"publisher","first-page":"3278","DOI":"10.1109\/TCSVT.2020.3037883","volume":"31","author":"E Apostolidis","year":"2021","unstructured":"Apostolidis, E., Adamantidou, E., Metsai, A.I., Mezaris, V., Patras, I.: AC-SUM-GAN: connecting actor-critic and generative adversarial networks for unsupervised video summarization. IEEE Trans. Circuits Syst. Video Technol. 31(8), 3278\u20133292 (2021). https:\/\/doi.org\/10.1109\/TCSVT.2020.3037883","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"10","key":"303_CR59","doi-asserted-by":"publisher","first-page":"3989","DOI":"10.1109\/TNNLS.2019.2951680","volume":"31","author":"B Zhao","year":"2020","unstructured":"Zhao, B., Li, X., Lu, X.: Property-constrained dual learning for video summarization. IEEE Trans. Neural Netw. Learn. Syst. 31(10), 3989\u20134000 (2020). https:\/\/doi.org\/10.1109\/TNNLS.2019.2951680","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"303_CR60","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16700-3","author":"K Yashwanth","year":"2023","unstructured":"Yashwanth, K., Soni, B.: Encoder-decoder architectures based video summarization using key-shot selection model. Multimedia Tools Appl. (2023). https:\/\/doi.org\/10.1007\/s11042-023-16700-3","journal-title":"Multimedia Tools Appl."},{"key":"303_CR61","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-023-10243-3","author":"J Qin","year":"2024","unstructured":"Qin, J., Yu, H., Liang, W., Ding, D.: Video summarization using knowledge distillation-based attentive network. Cogn. Comput. (2024). https:\/\/doi.org\/10.1007\/s12559-023-10243-3","journal-title":"Cogn. Comput."},{"key":"303_CR62","doi-asserted-by":"publisher","DOI":"10.1145\/2822907","author":"Z Li","year":"2016","unstructured":"Li, Z., Tang, J., Wang, X., Liu, J., Lu, H.: Multimedia news summarization in search. ACM Trans. Intell. Syst. Technol. (2016). https:\/\/doi.org\/10.1145\/2822907","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"303_CR63","doi-asserted-by":"publisher","unstructured":"Cho, K. et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP 2014 - 2014 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference, pp. 1724\u20131734 (2014). https:\/\/doi.org\/10.3115\/v1\/d14-1179","DOI":"10.3115\/v1\/d14-1179"},{"issue":"3","key":"303_CR64","doi-asserted-by":"publisher","first-page":"3904","DOI":"10.1109\/TPAMI.2022.3186506","volume":"45","author":"H Li","year":"2023","unstructured":"Li, H., Ke, Q., Gong, M., Zhang, R.: Video joint modelling based on hierarchical transformer for co-summarization. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3904\u20133917 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3186506","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"303_CR65","doi-asserted-by":"crossref","unstructured":"Otani, M., Nakashima, Y., Rahtu, E., Heikkila, J.: Rethinking the evaluation of video summaries. In: Proceedings of the IEEE conference on computer vision and pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00778"},{"key":"303_CR66","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1093\/biomet\/33.3.239","volume":"33","author":"MG Kendall","year":"1945","unstructured":"Kendall, M.G.: The treatment of ties in ranking problems. Biom etrika 33, 239\u2013251 (1945). https:\/\/doi.org\/10.1093\/biomet\/33.3.239","journal-title":"Biom etrika"}],"container-title":["New Generation Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00354-025-00303-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00354-025-00303-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00354-025-00303-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T20:03:05Z","timestamp":1762977785000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00354-025-00303-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":66,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["303"],"URL":"https:\/\/doi.org\/10.1007\/s00354-025-00303-7","relation":{},"ISSN":["0288-3635","1882-7055"],"issn-type":[{"value":"0288-3635","type":"print"},{"value":"1882-7055","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"23 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"19"}}