{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:26:12Z","timestamp":1771698372826,"version":"3.50.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031887048","type":"print"},{"value":"9783031887055","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-88705-5_23","type":"book-chapter","created":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T12:33:54Z","timestamp":1748435634000},"page":"269-280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Keyframe Extraction Based on Large Vision Transformer Model and kNN-DBSCAN Clustering"],"prefix":"10.1007","author":[{"given":"Huynh Anh","family":"Khoa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huynh Anh","family":"Duy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6033-6484","authenticated-orcid":false,"given":"Phan Duy","family":"Hung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,29]]},"reference":[{"key":"23_CR1","unstructured":"Sadiq, B.O., Muhammad, B., Abdullahi, M.N., Onuh, G., Muhammed, A.A., Babatunde, A.E.: Keyframe extraction techniques: a review. ELEKTRIKA- J. Elect. Eng. 19(3),\u00a0 54\u201360 (2020)"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Paul, M.K.A., Kavitha, J., Rani, P.A.J.: Key-frame extraction techniques: a review.\u00a0 Recent Patents Comput. Sci. 11(1),\u00a0 3\u201316 (2018)","DOI":"10.2174\/2213275911666180719111118"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Meena, P., Kumar, H., Yadav, S.K.: A review on video summarization techniques.\u00a0 Eng. Appli. Artifi. Intell. 118,\u00a0 105667 (2023)","DOI":"10.1016\/j.engappai.2022.105667"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2014, Columbus, OH, USA (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"23_CR5","unstructured":"Abu-El-Haija, S., Kothari, N., Lee, J., Natsev, P., Toderici, G., Varadarajan, B., Vijayanarasimhan, S.: Youtube-8m: a large scale video classification benchmark. arXiv: 1609.08675 (2016)"},{"key":"23_CR6","unstructured":"Song, Y., Vallmitjana, J., Stent, A., Jaimes, A.: Tvsum: summarizing web videos using titles. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2015, Boston, MA, USA (2015)"},{"key":"23_CR7","unstructured":"Ester, M., Kriegel, H.S., Sander, J., Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: KDD 1996 Proceedings (1996)"},{"key":"23_CR8","doi-asserted-by":"publisher","unstructured":"Zhou, H.: K-nearest neighbors. In: Learn Data Mining Through Excel, pp. 93\u2013108. Apress, Berkeley, CA (2020). https:\/\/doi.org\/10.1007\/978-1-4842-5982-5_7","DOI":"10.1007\/978-1-4842-5982-5_7"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Truong, B.T., Venkatesh, S.: Video abstraction: a systematic review and classification. ACM Trans. Multimedia Comput. Commun. Appli. 3(3) (2007)","DOI":"10.1145\/1198302.1198305"},{"key":"23_CR10","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1117\/12.238675","volume":"5","author":"JS Boreczky","year":"1996","unstructured":"Boreczky, J.S., Rowe, L.A.: Comparison of video shot boundary detection techniques. J. Electron. Imaging 5, 122\u2013129 (1996)","journal-title":"J. Electron. Imaging"},{"key":"23_CR11","first-page":"113","volume":"15","author":"A Nagasaka","year":"1992","unstructured":"Nagasaka, A., Tanaka, Y.: Automatic video indexing and full-video search for object appearances. J. Inform. Process. 15, 113\u2013127 (1992)","journal-title":"J. Inform. Process."},{"key":"23_CR12","doi-asserted-by":"publisher","unstructured":"Pal, G., Rudrapaul, D., Acharjee, S., Ray, R., Chakraborty, S., Dey, N.: Video shot boundary detection: a review. AISC,\u00a0 pp 119\u2013127. Springer (2015).\u00a0https:\/\/doi.org\/10.1007\/978-3-319-13731-5_14","DOI":"10.1007\/978-3-319-13731-5_14"},{"key":"23_CR13","unstructured":"Zhuang, Y., Rui, Y., Huang, T.S., Mehrotra, S.: Adaptive key frame extraction using unsupervised clustering. In: International Conference on Image Processing, Chicago, IL, USA (1998)"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"de Avila, S.E.F., Lopes, A.P.B., da Luz, A., de A. Araujo, A.: VSUMM: a mechanism designed to produce static video summaries and a novel evaluation method. Pattern Recognition Lett.\u00a0 32, 56\u201368 (2011)","DOI":"10.1016\/j.patrec.2010.08.004"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Sima, M.: Key frame extraction for human action videos in dynamic spatio-temporal slice clustering. In: Proceedings of the International Conference on Computer Information Science and Application Technology, Lanzhou, China (2021)","DOI":"10.1088\/1742-6596\/2010\/1\/012076"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Gharbi, H., Bahroun, S., Massaoudi, M., Zagrouba, E.: Key frames extraction using graph modularity clustering for efficient video summarization. In: Proceedings of the International Conference on Acoustics, Speech and Signal Processing, New Orleans, LA, USA (2017)","DOI":"10.1109\/ICASSP.2017.7952407"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Ding, Y., Hou, S., Yang, X.: Key frame extraction based on frame difference and cluster for person re-identification. In: Symposia and Workshops on Ubiquitous, Autonomic and Trusted Computing, Atlanta, GA, USA (2021)","DOI":"10.1109\/SWC50871.2021.00085"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Rodriguez, J.M.D., Yao, P., Wan, W.: Selection of key frames through the analysis and calculation of the absolute difference of histograms. In: International Conference on Audio, Language and Image Processing, Shanghai, China (2018)","DOI":"10.1109\/ICALIP.2018.8455591"},{"key":"23_CR19","doi-asserted-by":"publisher","unstructured":"Tra, N.D., Tri, N.C., Hung, P.D.: Improving warped planar object detection network for automatic license plate recognition. In: Dang, T.K., K\u00fcng, J., Chung, T.M. (eds.) Future Data and Security Engineering. Big Data, Security and Privacy, Smart City and Industry 4.0 Applications. FDSE 2023. CCIS, vol 1925. Springer, Singapore (2023).\u00a0https:\/\/doi.org\/10.1007\/978-3-319-13731-5_14","DOI":"10.1007\/978-3-319-13731-5_14"},{"key":"23_CR20","doi-asserted-by":"publisher","unstructured":"Manh, D.Q., Khoi, T.M., Hieu, D.M., Hung, P.D.: TextFocus: efficient Multi-scale Detection for Arbitrary Scene Text. In: H\u00e0, M.H., Zhu, X., Thai, M.T. (eds.) Computational Data and Social Networks. CSoNet 2023. LNCS, vol 14479. Springer, Singapore (2024).\u00a0https:\/\/doi.org\/10.1007\/978-981-97-0669-3_4","DOI":"10.1007\/978-981-97-0669-3_4"},{"key":"23_CR21","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp 1097\u20131105 (2012)"},{"key":"23_CR22","doi-asserted-by":"publisher","unstructured":"Mai, C.T.H., Dung, D.T.P., Le Anh Duc, P., Hung, P.D.: Skeleton-Based fall detection using computer vision. In: Luo, Y. (eds.) Cooperative Design, Visualization, and Engineering. CDVE 2023. LNCS, vol 14166. Springer, Cham (2023).\u00a0https:\/\/doi.org\/10.1007\/978-3-031-43815-8_15","DOI":"10.1007\/978-3-031-43815-8_15"},{"key":"23_CR23","doi-asserted-by":"publisher","unstructured":"Hieu, P.N.D., Hung, P.D.: A convolutional autoencoder approach for weakly supervised anomaly video detection. In: Nguyen, N.T., et al. Computational Collective Intelligence. ICCCI 2023. LNCS, vol 14162. Springer, Cham (2023).\u00a0https:\/\/doi.org\/10.1007\/978-3-031-41456-5_11","DOI":"10.1007\/978-3-031-41456-5_11"},{"key":"23_CR24","doi-asserted-by":"publisher","unstructured":"Savran K\u0131z\u0131ltepe, R., Gan, J.Q., Escobar, J.J.: A novel keyframe extraction method for video classification using deep neural networks. Neural Comput. Appl., 1\u201312 (2021). https:\/\/doi.org\/10.1007\/s00521-021-06322-x","DOI":"10.1007\/s00521-021-06322-x"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Lv, C., Li, J., Tian, J.: Key frame extraction for sports training based on improved deep learning. Sci. Program. 2021,\u00a0 1\u20138 (2021)","DOI":"10.1155\/2021\/1016574"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Tan, K., Zhou, Y., Xia, Q., Liu, R., Chen, Y.: Large model based sequential keyframe extraction for video summarization. In: Proceedings of the International Conference on Computing, Machine Learning and Data Science (CMLDS 2024). Association for Computing Machinery, New York, USA, Article 52, pp. 1\u20135 (2024)","DOI":"10.1145\/3661725.3661781"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Gharbi, H., Bahroun, S., Zagrouba, E.: A novel key frame extraction approach for video summarization. In: Proceedings of the 11th Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (VISIGRAPP 2016), vol. 3, pp. 148\u2013155 (2016)","DOI":"10.5220\/0005725701460153"},{"key":"23_CR28","unstructured":"Soucek, T., Lokoc, J.: Transnet V2: an effective deep network architecture for fast shot transition detection. arXiv: 2008.04838 (2020)"},{"key":"23_CR29","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML 2021, pp. 8748\u20138763 (2021)"},{"key":"23_CR30","unstructured":"Vaswani, A., et al.: Attention Is All You Need. arXiv:1706.03762 (2017)"},{"issue":"7","key":"23_CR31","doi-asserted-by":"publisher","first-page":"4537","DOI":"10.1016\/j.jksuci.2020.10.031","volume":"34","author":"HM Nandini","year":"2022","unstructured":"Nandini, H.M., Chethan, H.K., Rashmi, B.S.: Shot based keyframe extraction using edge-lbp approach. J. King Saud Univ.-Comput. Inform. Sci. 34(7), 4537\u20134545 (2022)","journal-title":"J. King Saud Univ.-Comput. Inform. Sci."}],"container-title":["Lecture Notes in Computer Science","Intelligent Human Computer Interaction"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-88705-5_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T12:34:04Z","timestamp":1748435644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-88705-5_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031887048","9783031887055"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-88705-5_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"29 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IHCI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Human Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Twente","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ihci2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.utwente.nl\/en\/bms\/ihci2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}