{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T23:03:02Z","timestamp":1773615782806,"version":"3.50.1"},"reference-count":30,"publisher":"Allerton Press","issue":"6","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Aut. Control Comp. Sci."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.3103\/s0146411625701354","type":"journal-article","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T14:13:18Z","timestamp":1771251198000},"page":"832-845","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Scene-Based Multimodal Video Similarity Detection Algorithm"],"prefix":"10.3103","volume":"59","author":[{"family":"Xue\u00a0Li","sequence":"first","affiliation":[]}],"member":"1627","published-online":{"date-parts":[[2026,2,16]]},"reference":[{"key":"7881_CR1","doi-asserted-by":"publisher","first-page":"7834","DOI":"10.3390\/app10217834","volume":"10","author":"H. Zhu","year":"2020","unstructured":"Zhu, H., Wei, H., Li, B., Yuan, X., and Kehtarnavaz, N., A review of video object detection: datasets, metrics and methods, Appl. Sci., 2020, vol. 10, no. 21, p. 7834. https:\/\/doi.org\/10.3390\/app10217834","journal-title":"Appl. Sci."},{"key":"7881_CR2","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1109\/TCSVT.2002.808080","volume":"13","author":"S.-S. Cheung","year":"2003","unstructured":"Cheung, S.-S. and Zakhor, A., Efficient video similarity measurement with video signature, IEEE Trans. Circuits Syst. Video Technol., 2003, vol. 13, no. 1, pp. 59\u201374. https:\/\/doi.org\/10.1109\/TCSVT.2002.808080","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"7881_CR3","doi-asserted-by":"publisher","first-page":"103997","DOI":"10.1016\/j.cviu.2024.103997","volume":"243","author":"E. Pizzi","year":"2024","unstructured":"Pizzi, E., Kordopatis-Zilos, G., Patel, H., Postelnicu, G., Nagavara Ravindra, S., Gupta, A., Papadopoulos, S., Tolias, G., and Douze, M., The 2023 video similarity dataset and challenge, Comput. Vision Image Understanding, 2024, vol. 243, p. 103997. https:\/\/doi.org\/10.1016\/j.cviu.2024.103997","journal-title":"Comput. Vision Image Understanding"},{"key":"7881_CR4","doi-asserted-by":"publisher","unstructured":"Pizzi, E., Roy, S.D., Ravindra, S.N., Goyal, P., and Douze, M., A self-supervised descriptor for image copy detection, 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, 2022, IEEE, 2022, pp. 14532\u201314542. https:\/\/doi.org\/10.1109\/cvpr52688.2022.01413","DOI":"10.1109\/cvpr52688.2022.01413"},{"key":"7881_CR5","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A. Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., and Hinton, G.E., ImageNet classification with deep convolutional neural networks, Commun. ACM, 2017, vol. 60, no. 6, pp. 84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun. ACM"},{"key":"7881_CR6","doi-asserted-by":"publisher","unstructured":"Simonyan, K. and Zisserman, A., Very deep convolutional networks for large-scale image recognition, arXiv Preprint, 2014. https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"key":"7881_CR7","doi-asserted-by":"publisher","unstructured":"Szegedy, Ch., Liu, W., Jia, Ya., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., and Rabinovich, A., Going deeper with convolutions, 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Boston, 2015, IEEE, 2015, pp. 1\u20139. https:\/\/doi.org\/10.1109\/cvpr.2015.7298594","DOI":"10.1109\/cvpr.2015.7298594"},{"key":"7881_CR8","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"Sh. Ji","year":"2013","unstructured":"Ji, Sh., Xu, W., Yang, M., and Yu, K., 3D convolutional neural networks for human action recognition, IEEE Trans. Pattern Anal. Mach. Intell., 2013, vol. 35, no. 1, pp. 221\u2013231. https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7881_CR9","doi-asserted-by":"publisher","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., and Paluri, M., Learning spatiotemporal features with 3D convolutional networks, 2015 IEEE International Conference on Computer Vision (ICCV), Santiago, 2015, IEEE, 2015, pp. 4489\u20134497. https:\/\/doi.org\/10.1109\/iccv.2015.510","DOI":"10.1109\/iccv.2015.510"},{"key":"7881_CR10","doi-asserted-by":"publisher","unstructured":"Poullot, S., Tsukatani, Sh., Phuong Nguyen, A., J\u00e9gou, H., and Satoh, Sh., Temporal matching kernel with explicit feature maps, Proceedings of the 23rd ACM international conference on Multimedia, Brisbane, Australia, 2015, New York: Association for Computing Machinery, 2015, pp. 381\u2013390. https:\/\/doi.org\/10.1145\/2733373.2806228","DOI":"10.1145\/2733373.2806228"},{"key":"7881_CR11","doi-asserted-by":"publisher","unstructured":"Baraldi, L., Douze, M., Cucchiara, R., and Jegou, H., LAMV: Learning to align and match videos with kernelized temporal layers, 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Salt Lake City, UT, 2018, IEEE, 2018, pp. 7804\u20137813. https:\/\/doi.org\/10.1109\/cvpr.2018.00814","DOI":"10.1109\/cvpr.2018.00814"},{"key":"7881_CR12","doi-asserted-by":"publisher","first-page":"116335","DOI":"10.1016\/j.eswa.2021.116335","volume":"192","author":"A. Almeida","year":"2022","unstructured":"Almeida, A., De Villiers, J.P., De Freitas, A., and Velayudan, M., The complementarity of a diverse range of deep learning features extracted from video content for video recommendation, Expert Syst. Appl., 2022, vol.\u00a0192, p. 116335. https:\/\/doi.org\/10.1016\/j.eswa.2021.116335","journal-title":"Expert Syst. Appl."},{"key":"7881_CR13","doi-asserted-by":"publisher","unstructured":"Feng, Ya., Ma, L., Liu, W., Zhang, T., and Luo, J., Video re-localization, Computer Vision\u2013ECCV 2018, Ferrari, V., Hebert, M., Sminchisescu, C., and Weiss, Y., Eds., Lecture Notes in Computer Science, vol. 11218, Cham: Springer, 2018, pp. 55\u201370. https:\/\/doi.org\/10.1007\/978-3-030-01264-9_4","DOI":"10.1007\/978-3-030-01264-9_4"},{"key":"7881_CR14","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1108\/intr-07-2020-0388","volume":"32","author":"R. Chen","year":"2022","unstructured":"Chen, R., Multimodal cooperative learning for micro-video advertising click prediction, Internet Research, 2022, vol. 32, no. 2, pp. 477\u2013495. https:\/\/doi.org\/10.1108\/intr-07-2020-0388","journal-title":"Internet Research"},{"key":"7881_CR15","doi-asserted-by":"publisher","unstructured":"Wang, L., Huang, B., Zhao, Zh., Tong, Zh., He, Yi., Wang, Yi., Wang, Ya., and Qiao, Yu., VideoMAE V2: Scaling video masked autoencoders with dual masking, 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Vancouver, 2023, IEEE, 2023, pp. 14549\u201314560. https:\/\/doi.org\/10.1109\/cvpr52729.2023.01398","DOI":"10.1109\/cvpr52729.2023.01398"},{"key":"7881_CR16","doi-asserted-by":"publisher","unstructured":"Yokoo, S., Contrastive learning with large memory bank and negative embedding subtraction for accurate copy detection, arXiv Preprint, 2021. https:\/\/doi.org\/10.48550\/arXiv.2112.04323","DOI":"10.48550\/arXiv.2112.04323"},{"key":"7881_CR17","doi-asserted-by":"publisher","unstructured":"Deng, R., Wu, Q., and Li, Yu., 3D-CSL: Self-supervised 3D context similarity learning for near-duplicate video retrieval, 2023 IEEE International Conference on Image Processing (ICIP), Kuala Lumpur, 2023, IEEE, 2023, pp. 2880\u20132884. https:\/\/doi.org\/10.1109\/icip49359.2023.10222915","DOI":"10.1109\/icip49359.2023.10222915"},{"key":"7881_CR18","doi-asserted-by":"publisher","unstructured":"Carreira, J. and Zisserman, A., Quo vadis, action recognition? A new model and the kinetics dataset, 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, 2017: IEEE, 2017, pp. 6299\u20136308. https:\/\/doi.org\/10.1109\/cvpr.2017.502","DOI":"10.1109\/cvpr.2017.502"},{"key":"7881_CR19","doi-asserted-by":"publisher","unstructured":"Kordopatis-Zilos, G., Papadopoulos, S., Patras, I., and Kompatsiaris, Yi., ViSiL: Fine-grained spatio-temporal video similarity learning, 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), Seoul, 2019, IEEE, 2019, pp. 6351\u20136360. https:\/\/doi.org\/10.1109\/iccv.2019.00645","DOI":"10.1109\/iccv.2019.00645"},{"key":"7881_CR20","doi-asserted-by":"publisher","first-page":"126938","DOI":"10.1016\/j.neucom.2023.126938","volume":"563","author":"K. Liu","year":"2024","unstructured":"Liu, K., Wang, J., and Zhang, X., Debiased momentum contrastive learning for multimodal video similarity measures, Neurocomputing, 2024, vol. 563, p. 126938. https:\/\/doi.org\/10.1016\/j.neucom.2023.126938","journal-title":"Neurocomputing"},{"key":"7881_CR21","doi-asserted-by":"publisher","unstructured":"Jiang, Ch., Huang, K., He, S., Yang, X., Zhang, W., Zhang, X., Cheng, Yu., Yang, L., Wang, Q., Xu, F., Pan, T., and Chu, W., Learning segment similarity and alignment in large-scale content based video retrieval, Proceedings of the 29th ACM International Conference on Multimedia, New York: Association for Computing Machinery, 2021, pp. 1618\u20131626. https:\/\/doi.org\/10.1145\/3474085.3475301","DOI":"10.1145\/3474085.3475301"},{"key":"7881_CR22","doi-asserted-by":"publisher","unstructured":"Ma, Z., Lou, M., and Ouyang, X., Top1 solution of QQ browser 2021 AI algorithm competition track 1: Multimodal video similarity, arXiv Preprint, 2021. https:\/\/doi.org\/10.48550\/arXiv.2111.01677","DOI":"10.48550\/arXiv.2111.01677"},{"key":"7881_CR23","doi-asserted-by":"publisher","unstructured":"Dzabraev, M., Kalashnikov, M., Komkov, S., and Petiushko, A., MDMMT: Multidomain multimodal transformer for video retrieval, 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), Nashville, TN, 2021, IEEE, 2021, pp. 3354\u20133363. https:\/\/doi.org\/10.1109\/cvprw53098.2021.00374","DOI":"10.1109\/cvprw53098.2021.00374"},{"key":"7881_CR24","volume-title":"Parametric correspondence and chamfer matching: Two new techniques for image matching, Proceedings: Image Understanding Workshop.","author":"H.G. Barrow","year":"1977","unstructured":"Barrow, H.G., Tenenbaum, J.M., Bolles, R.C., et al., Parametric correspondence and chamfer matching: Two new techniques for image matching, Proceedings: Image Understanding Workshop. \n               Science Applications, 1977, pp. 21\u201327."},{"key":"7881_CR25","doi-asserted-by":"publisher","unstructured":"Tsai, S.-H., Peng, Y.-L., and Lee, W.-P., Multi-turn query with similarity feedback facilitates multimodal video clip retrieval, 2022 IEEE Eighth International Conference on Multimedia Big Data (BigMM), Naples, Italy, 2022, IEEE, 2022, pp. 79\u201386. https:\/\/doi.org\/10.1109\/bigmm55396.2022.00019","DOI":"10.1109\/bigmm55396.2022.00019"},{"key":"7881_CR26","doi-asserted-by":"publisher","unstructured":"Wu, X., Hauptmann, A.G., and Ngo, C.-W., Practical elimination of near-duplicates from web video search, Proceedings of the 15th ACM International Conference on Multimedia, Augsburg, Germany, 2007, New York: Association for Computing Machinery, 2007, pp. 218\u2013227. https:\/\/doi.org\/10.1145\/1291233.1291280","DOI":"10.1145\/1291233.1291280"},{"key":"7881_CR27","doi-asserted-by":"publisher","unstructured":"Jiang, Y.-G., Jiang, Yu., and Wang, J., VCDB: A large-scale database for partial copy detection in videos, Computer Vision\u2013ECCV 2014, Fleet, D., Pajdla, T., Schiele, B., and Tuytelaars, T., Eds., Lecture Notes in Computer Science, vol. 8692, Cham: Springer, 2014, pp. 357\u2013371. https:\/\/doi.org\/10.1007\/978-3-319-10593-2_24","DOI":"10.1007\/978-3-319-10593-2_24"},{"key":"7881_CR28","doi-asserted-by":"publisher","unstructured":"Luo, Zh., Zhang, L., Lai, J., Wang, X., and Tang, Ch., An efficient partial video copy detection for a large-scale video database, 2023 9th International Conference on Big Data Computing and Communications (BigCom), Hainan, China, 2023, IEEE, 2023, pp. 117\u2013125. https:\/\/doi.org\/10.1109\/bigcom61073.2023.00024","DOI":"10.1109\/bigcom61073.2023.00024"},{"key":"7881_CR29","doi-asserted-by":"publisher","unstructured":"Wang, X., Hua, Ya., Kodirov, E., Hu, G., Garnier, R., and Robertson, N.M., Ranked list loss for deep metric learning, 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Long Beach, CA, 2019, IEEE, 2019, pp. 5207\u20135216. https:\/\/doi.org\/10.1109\/cvpr.2019.00535","DOI":"10.1109\/cvpr.2019.00535"},{"key":"7881_CR30","doi-asserted-by":"publisher","first-page":"115","DOI":"10.3390\/info14020115","volume":"14","author":"H. You","year":"2023","unstructured":"You, H., Lu, Yu., and Tang, H., Improved feature extraction and similarity algorithm for video object detection, Information, 2023, vol. 14, no. 2, p. 115. https:\/\/doi.org\/10.3390\/info14020115","journal-title":"Information"}],"container-title":["Automatic Control and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.3103\/S0146411625701354.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.3103\/S0146411625701354","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.3103\/S0146411625701354.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T22:04:30Z","timestamp":1773612270000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.3103\/S0146411625701354"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":30,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["7881"],"URL":"https:\/\/doi.org\/10.3103\/s0146411625701354","relation":{},"ISSN":["0146-4116","1558-108X"],"issn-type":[{"value":"0146-4116","type":"print"},{"value":"1558-108X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"20 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 March 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 April 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"As author of this work, I declare that I have no conflicts of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"CONFLICT OF INTEREST"}}]}}