{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:35:12Z","timestamp":1780637712062,"version":"3.54.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T00:00:00Z","timestamp":1603065600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T00:00:00Z","timestamp":1603065600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s11760-020-01796-z","type":"journal-article","created":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T07:03:06Z","timestamp":1603090986000},"page":"779-787","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["A new 3D convolutional neural network (3D-CNN) framework for multimedia event detection"],"prefix":"10.1007","volume":"15","author":[{"given":"Kaavya","family":"Kanagaraj","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"G. G. Lakshmi","family":"Priya","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,10,19]]},"reference":[{"issue":"2","key":"1796_CR1","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s11760-017-1153-0","volume":"12","author":"Liu Kangwei","year":"2018","unstructured":"Kangwei, Liu, Jianhua, Wan, Zhongzhi, Han: Abnormal event detection and localization using level set based on hybrid features. Signal Image Video Process. 12(2), 255\u2013261 (2018)","journal-title":"Signal Image Video Process."},{"issue":"11","key":"1796_CR2","doi-asserted-by":"publisher","first-page":"117203","DOI":"10.1117\/1.3509270","volume":"49","author":"E Saykol","year":"2010","unstructured":"Saykol, E., et al.: Keyframe labeling technique for surveillance event classification. Opt. Eng. 49(11), 117203 (2010)","journal-title":"Opt. Eng."},{"key":"1796_CR3","doi-asserted-by":"publisher","first-page":"10587","DOI":"10.1007\/s10586-017-1135-6","volume":"22","author":"D Srikanth","year":"2019","unstructured":"Srikanth, D., Sakthivel, S.: Vantage Point Latent Semantic Indexing for multimedia web document search. Clust. Comput. 22, 10587\u201310594 (2019). https:\/\/doi.org\/10.1007\/s10586-017-1135-6","journal-title":"Clust. Comput."},{"issue":"3","key":"1796_CR4","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/MMUL.2010.5692184","volume":"17","author":"M Ba\u015ftan","year":"2010","unstructured":"Ba\u015ftan, M., et al.: Bilvideo-7: an MPEG-7-compatible video indexing and retrieval system. IEEE MultiMed. 17(3), 62\u201373 (2010)","journal-title":"IEEE MultiMed."},{"issue":"6","key":"1796_CR5","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s00530-010-0182-0","volume":"16","author":"PK Atrey","year":"2010","unstructured":"Atrey, P.K., et al.: Multimodal fusion for multimedia analysis: a survey. Multimed. Syst. 16(6), 345\u2013379 (2010)","journal-title":"Multimed. Syst."},{"issue":"11","key":"1796_CR6","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., et al.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"1796_CR7","doi-asserted-by":"crossref","unstructured":"Deng, J. et al.: Imagenet: A large-scale hierarchical image database. 2009 IEEE conference on computer vision and pattern recognition. IEEE, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"1","key":"1796_CR8","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2012","unstructured":"Ji, S., et al.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1796_CR9","unstructured":"Krizhevsky, A., Ilya S., Geoffrey E. H.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 1097\u20131105 (2012)"},{"key":"1796_CR10","doi-asserted-by":"crossref","unstructured":"Girshick, R., et al.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"issue":"8","key":"1796_CR11","doi-asserted-by":"publisher","first-page":"1915","DOI":"10.1109\/TPAMI.2012.231","volume":"35","author":"C Farabet","year":"2012","unstructured":"Farabet, C., et al.: Learning hierarchical features for scene labeling. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1915\u20131929 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1796_CR12","doi-asserted-by":"crossref","unstructured":"Deng, J., et al.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. IEEE, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1796_CR13","doi-asserted-by":"crossref","unstructured":"Karpathy, A., et al.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition. pp. 1725\u20131732 (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"1796_CR14","unstructured":"Yu, K., Wei X., Yihong G.: Deep learning with kernel regularization for visual recognition. In  Advances in Neural Information Processing Systems, pp.1889\u20131896 (2009)"},{"key":"1796_CR15","doi-asserted-by":"crossref","unstructured":"Mobahi, H., Ronan C., Jason W.: Deep learning from temporal coherence in video. In: Proceedings of the 26th Annual International Conference on Machine Learning. pp. 737-744. (2009)","DOI":"10.1145\/1553374.1553469"},{"key":"1796_CR16","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1796_CR17","unstructured":"Simonyan, K., Andrew Z.: Very deep convolutional networks for large-scale image recognition.\u00a0arXiv preprint arXiv:1409.1556(2014)"},{"issue":"9","key":"1796_CR18","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He, K., et al.: Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 37(9), 1904\u20131916 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1796_CR19","unstructured":"Yue-Hei Ng, Joe, et al.: Beyond short snippets: Deep networks for video classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 4694\u20134702 (2015)"},{"issue":"3","key":"1796_CR20","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1007\/s00530-015-0485-2","volume":"23","author":"W Nie","year":"2017","unstructured":"Nie, W., et al.: Convolutional deep learning for 3D object retrieval. Multimed. Syst. 23(3), 325\u2013332 (2017)","journal-title":"Multimed. Syst."},{"issue":"5","key":"1796_CR21","doi-asserted-by":"publisher","first-page":"1088","DOI":"10.1109\/TMM.2017.2763322","volume":"20","author":"H Song","year":"2018","unstructured":"Song, H., et al.: Extracting key segments of videos for event detection by learning from web sources. IEEE Trans. Multimed. 20(5), 1088\u20131100 (2018)","journal-title":"IEEE Trans. Multimed."},{"key":"1796_CR22","unstructured":"Socher, R., et al.: Convolutional-recursive deep learning for 3d object classification. Adv. Neural Inf. Process. Syst. 656\u2013664 (2012)"},{"key":"1796_CR23","doi-asserted-by":"crossref","unstructured":"Ye, H., et al.: Evaluating two-stream CNN for video classification. In: Proceedings of the 5th ACM on International Conference on Multimedia Retrieval. pp. 435\u201344 (2015)","DOI":"10.1145\/2671188.2749406"},{"key":"1796_CR24","unstructured":"Hinton, Geoffrey, Nitish Srivastava, and Kevin Swersky. \u201cLecture 6a overview of mini\u2013batch gradient descent.\u201d\u00a0Coursera Lecture slides https:\/\/class . coursera. org\/neuralnets-2012-001\/lecture,[Online\u00a0(2012)"},{"key":"1796_CR25","doi-asserted-by":"crossref","unstructured":"Karpathy, A., et al.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.223"},{"issue":"6","key":"1796_CR26","doi-asserted-by":"publisher","first-page":"1510","DOI":"10.1109\/TPAMI.2017.2712608","volume":"40","author":"G\u00fcl Varol","year":"2017","unstructured":"Varol, G\u00fcl, Laptev, Ivan, Schmid, Cordelia: Long-term temporal convolutions for action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 40(6), 1510\u20131517 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"1796_CR27","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/TMM.2018.2844101","volume":"21","author":"Mohammad Soltanian","year":"2018","unstructured":"Soltanian, Mohammad, Ghaemmaghami, Shahrokh: Hierarchical Concept Score Postprocessing and Concept-Wise Normalization in CNN-Based Video Event Recognition. IEEE Trans. Multimed. 21(1), 157\u2013172 (2018)","journal-title":"IEEE Trans. Multimed."},{"key":"1796_CR28","doi-asserted-by":"crossref","unstructured":"Wang, H., et al.: Evaluation of local spatio-temporal features for action recognition. In: British Machine Vision Conference, London, United Kingdom (2009)","DOI":"10.5244\/C.23.124"},{"key":"1796_CR29","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1016\/j.neucom.2016.06.002","volume":"208","author":"Zhicheng Zhao","year":"2016","unstructured":"Zhao, Zhicheng, Song, Yifan, Fei, Su: Specific video identification via joint learning of latent semantic concept, scene and temporal structure. Neurocomputing 208, 378\u2013386 (2016)","journal-title":"Neurocomputing"},{"issue":"2","key":"1796_CR30","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1109\/TPAMI.2017.2670560","volume":"40","author":"Y-G Jiang","year":"2017","unstructured":"Jiang, Y.-G., et al.: Exploiting feature and class relationships in video categorization with regularized deep neural networks. IEEE Trans. Pattern Anal. Mach. Intell. 40(2), 352\u2013364 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"1796_CR31","doi-asserted-by":"publisher","first-page":"3209","DOI":"10.1007\/s11042-017-5058-2","volume":"77","author":"Zhicheng Zhao","year":"2018","unstructured":"Zhao, Zhicheng, Xiang, Rui, Fei, Su: Complex event detection via attention-based video representation and classification. Multimed. Tools Appl. 77(3), 3209\u20133227 (2018)","journal-title":"Multimed. Tools Appl."}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-020-01796-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-020-01796-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-020-01796-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,25]],"date-time":"2021-10-25T15:37:42Z","timestamp":1635176262000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-020-01796-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,19]]},"references-count":31,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["1796"],"URL":"https:\/\/doi.org\/10.1007\/s11760-020-01796-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,19]]},"assertion":[{"value":"2 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 September 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 October 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}