{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T06:26:51Z","timestamp":1766298411891,"version":"3.37.3"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T00:00:00Z","timestamp":1690156800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T00:00:00Z","timestamp":1690156800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2019YFB1405803"],"award-info":[{"award-number":["2019YFB1405803"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16269-x","type":"journal-article","created":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T05:01:46Z","timestamp":1690174906000},"page":"36899-36919","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["SCTF: an efficient neural network based on local spatial compression and full temporal fusion for video violence detection"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9870-8925","authenticated-orcid":false,"given":"Tan","family":"Zhenhua","sequence":"first","affiliation":[]},{"given":"Xia","family":"Zhenche","sequence":"additional","affiliation":[]},{"given":"Wang","family":"Pengfei","sequence":"additional","affiliation":[]},{"given":"Wu","family":"Danke","sequence":"additional","affiliation":[]},{"given":"Li","family":"li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,24]]},"reference":[{"key":"16269_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2020.104090","volume":"106","author":"F Afza","year":"2021","unstructured":"Afza F, Khan MA, Sharif M, Kadry S, Manogaran G, Saba T, Ashraf I, Dama\u0161evi\u010dius R (2021) A framework of human action recognition using length control features fusion and weighted entropy-variances based feature selection. Image and Vision Computing 106:104090","journal-title":"Image and Vision Computing"},{"key":"16269_CR2","doi-asserted-by":"publisher","first-page":"119391","DOI":"10.1016\/j.eswa.2022.119391","volume":"215","author":"A Amelio","year":"2023","unstructured":"Amelio A, Bonifazi G, Cauteruccio F, Corradini E, Marchetti M, Ursino D, Virgili L (2023) Representation and compression of residual neural networks through a multilayer network based approach. Expert Systems with Applications 215:119391","journal-title":"Expert Systems with Applications"},{"key":"16269_CR3","doi-asserted-by":"crossref","unstructured":"Bermejo\u00a0Nievas E, Deniz\u00a0Suarez O, Bueno\u00a0Garc\u00eda G, Sukthankar R (2011) Violence detection in video using computer vision techniques. In: International Conference on Computer Analysis of Images and Patterns, pp. 332\u2013339. Springer","DOI":"10.1007\/978-3-642-23678-5_39"},{"key":"16269_CR4","doi-asserted-by":"crossref","unstructured":"Bilen H, Fernando B, Gavves E, Vedaldi A, Gould S (2016) Dynamic image networks for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3034\u20133042","DOI":"10.1109\/CVPR.2016.331"},{"key":"16269_CR5","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"16269_CR6","doi-asserted-by":"crossref","unstructured":"Chollet F (2017) Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1251\u20131258","DOI":"10.1109\/CVPR.2017.195"},{"key":"16269_CR7","doi-asserted-by":"crossref","unstructured":"Das S, Sharma S, Dai R, Bremond F, Thonnat M (2020) Vpn: Learning video-pose embedding for activities of daily living. In: European Conference on Computer Vision, pp. 72\u201390. Springer","DOI":"10.1007\/978-3-030-58545-7_5"},{"key":"16269_CR8","doi-asserted-by":"crossref","unstructured":"Diba A, Sharma V, Van\u00a0Gool L (2017) Deep temporal linear encoding networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2329\u20132338","DOI":"10.1109\/CVPR.2017.168"},{"key":"16269_CR9","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Fan H, Malik J, He K (2019) Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211","DOI":"10.1109\/ICCV.2019.00630"},{"key":"16269_CR10","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Pinz A, Zisserman A (2016) Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1933\u20131941","DOI":"10.1109\/CVPR.2016.213"},{"key":"16269_CR11","doi-asserted-by":"crossref","unstructured":"Girdhar R, Carreira J, Doersch C, Zisserman A (2019) Video action transformer network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 244\u2013253","DOI":"10.1109\/CVPR.2019.00033"},{"key":"16269_CR12","doi-asserted-by":"publisher","first-page":"85958","DOI":"10.1109\/ACCESS.2020.2992617","volume":"8","author":"C Gu","year":"2020","unstructured":"Gu C, Wu X, Wang S (2020) Violent video detection based on semantic correspondence. IEEE Access 8:85958\u201385967","journal-title":"IEEE Access"},{"key":"16269_CR13","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.neucom.2020.11.074","volume":"458","author":"J Guo","year":"2021","unstructured":"Guo J, Shi M, Zhu X, Huang W, He Y, Zhang W, Tang Z (2021) Improving human action recognition by jointly exploiting video and wifi clues. Neurocomputing 458:14\u201323","journal-title":"Neurocomputing"},{"issue":"2","key":"16269_CR14","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1016\/j.acha.2010.04.005","volume":"30","author":"DK Hammond","year":"2011","unstructured":"Hammond DK, Vandergheynst P, Gribonval R (2011) Wavelets on graphs via spectral graph theory. Applied and Computational Harmonic Analysis 30(2):129\u2013150","journal-title":"Applied and Computational Harmonic Analysis"},{"key":"16269_CR15","doi-asserted-by":"crossref","unstructured":"Hassner T, Itcher Y, Kliper-Gross O (2012) Violent flows: Real-time detection of violent crowd behavior. In: 2012 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, pp. 1\u20136. IEEE","DOI":"10.1109\/CVPRW.2012.6239348"},{"key":"16269_CR16","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/j.neucom.2020.05.118","volume":"444","author":"J-Y He","year":"2021","unstructured":"He J-Y, Wu X, Cheng Z-Q, Yuan Z, Jiang Y-G (2021) Db-lstm: Densely-connected bi-directional lstm for human action recognition. Neurocomputing 444:319\u2013331","journal-title":"Neurocomputing"},{"issue":"7","key":"16269_CR17","doi-asserted-by":"publisher","first-page":"4584","DOI":"10.1109\/TII.2020.3018487","volume":"17","author":"S Jiang","year":"2020","unstructured":"Jiang S, Qi Y, Zhang H, Bai Z, Lu X, Wang P (2020) D3d: Dual 3-d convolutional network for real-time action recognition. IEEE Transactions on Industrial Informatics 17(7):4584\u20134593","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"16269_CR18","doi-asserted-by":"crossref","unstructured":"Jiang B, Xu F, Tu W, Yang C (2019) Channel-wise attention in 3d convolutional networks for violence detection. In: 2019 International Conference on Intelligent Computing and Its Emerging Applications (ICEA), pp. 59\u201364. IEEE","DOI":"10.1109\/ICEA.2019.8858306"},{"key":"16269_CR19","doi-asserted-by":"crossref","unstructured":"Jiang B, Xu F, Tu W, Yang C (2019) Channel-wise attention in 3d convolutional networks for violence detection. In: 2019 International Conference on Intelligent Computing and Its Emerging Applications (ICEA), pp. 59\u201364. IEEE","DOI":"10.1109\/ICEA.2019.8858306"},{"issue":"15","key":"16269_CR20","doi-asserted-by":"publisher","first-page":"1047","DOI":"10.1049\/el.2017.0970","volume":"53","author":"A Ke\u00e7eli","year":"2017","unstructured":"Ke\u00e7eli A, Kaya A (2017) Violent activity detection with transfer learning method. Electronics Letters 53(15):1047\u20131048","journal-title":"Electronics Letters"},{"key":"16269_CR21","doi-asserted-by":"crossref","unstructured":"Klaser A, Marsza\u0142ek M, Schmid C (2008) A spatio-temporal descriptor based on 3d-gradients. In: BMVC 2008-19th British Machine Vision Conference, pp. 275\u20131. British Machine Vision Association","DOI":"10.5244\/C.22.99"},{"key":"16269_CR22","unstructured":"Kolesnikov A, Dosovitskiy A, Weissenborn D, Heigold G, Uszkoreit J, Beyer L, Minderer M, Dehghani M, Houlsby N, Gelly S, et al (2021) An image is worth 16x16 words: Transformers for image recognition at scale"},{"key":"16269_CR23","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) Hmdb: a large video database for human motion recognition. In: 2011 International Conference on Computer Vision, pp. 2556\u20132563. IEEE","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"16269_CR24","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1016\/j.neucom.2020.07.148","volume":"453","author":"C Li","year":"2021","unstructured":"Li C, Zhang J, Yao J (2021) Streamer action recognition in live video with spatial-temporal attention and deep dictionary learning. Neurocomputing 453:383\u2013392","journal-title":"Neurocomputing"},{"issue":"5","key":"16269_CR25","doi-asserted-by":"publisher","first-page":"1596","DOI":"10.1007\/s11263-021-01436-0","volume":"129","author":"R Liu","year":"2021","unstructured":"Liu R, Shen J, Wang H, Chen C, Cheung S-C, Asari VK (2021) Enhanced 3d human pose estimation from videos by using attention-based neural network with dilated convolutions. International Journal of Computer Vision 129(5):1596\u20131615","journal-title":"International Journal of Computer Vision"},{"key":"16269_CR26","doi-asserted-by":"crossref","unstructured":"Li C, Xie C, Zhang B, Han J, Zhen X, Chen J (2021) Memory attention networks for skeleton-based action recognition. IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2021.3061115"},{"key":"16269_CR27","unstructured":"Misra D (2019) Mish: A self regularized non-monotonic activation function. arXiv preprint arXiv:1908.08681"},{"issue":"12","key":"16269_CR28","doi-asserted-by":"publisher","first-page":"18365","DOI":"10.1007\/s11042-021-10682-w","volume":"80","author":"AJ Naik","year":"2021","unstructured":"Naik AJ, Gopalakrishna M (2021) Deep-violence: individual person violent activity detection in video. Multimedia Tools and Applications 80(12):18365\u201318380","journal-title":"Multimedia Tools and Applications"},{"key":"16269_CR29","doi-asserted-by":"crossref","unstructured":"Nam J, Alghoniemy M, Tewfik AH (1998) Audio-visual content-based violent scene characterization. In: Proceedings 1998 International Conference on Image Processing. ICIP98 (Cat. No. 98CB36269), vol. 1, pp. 353\u2013357. IEEE","DOI":"10.1109\/ICIP.1998.723496"},{"key":"16269_CR30","doi-asserted-by":"crossref","unstructured":"Ranasinghe K, Naseer M, Khan S, Khan FS, Ryoo M (2021) Self-supervised video transformer. arXiv preprint arXiv:2112.01514","DOI":"10.1109\/CVPR52688.2022.00289"},{"key":"16269_CR31","doi-asserted-by":"crossref","unstructured":"Roman DGC, Ch\u00e1vez GC (2020) Violence detection and localization in surveillance video. In: 2020 33rd SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI), pp. 248\u2013255. IEEE","DOI":"10.1109\/SIBGRAPI51738.2020.00041"},{"key":"16269_CR32","doi-asserted-by":"crossref","unstructured":"Scovanner P, Ali S, Shah M (2007) A 3-dimensional sift descriptor and its application to action recognition. In: Proceedings of the 15th ACM International Conference on Multimedia, pp. 357\u2013360","DOI":"10.1145\/1291233.1291311"},{"key":"16269_CR33","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. Adv Neural Inf Proces Syst 27"},{"key":"16269_CR34","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"16269_CR35","doi-asserted-by":"crossref","unstructured":"Soliman MM, Kamal MH, Nashed MAEM, Mostafa YM, Chawky BS, Khattab D (2019) Violence recognition from videos using deep learning techniques. In: 2019 Ninth International Conference on Intelligent Computing and Information Systems (ICICIS), pp. 80\u201385. IEEE","DOI":"10.1109\/ICICIS46948.2019.9014714"},{"key":"16269_CR36","doi-asserted-by":"publisher","first-page":"39172","DOI":"10.1109\/ACCESS.2019.2906275","volume":"7","author":"W Song","year":"2019","unstructured":"Song W, Zhang D, Zhao X, Yu J, Zheng R, Wang A (2019) A novel violent video detection scheme based on modified 3d convolutional neural networks. IEEE Access 7:39172\u201339179","journal-title":"IEEE Access"},{"key":"16269_CR37","unstructured":"Soomro K, Zamir AR, Shah M (2012) Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402"},{"key":"16269_CR38","doi-asserted-by":"crossref","unstructured":"Sudhakaran S, Lanz O (2017) Learning to detect violent videos using convolutional long short-term memory. In: 2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS), pp. 1\u20136. IEEE","DOI":"10.1109\/AVSS.2017.8078468"},{"key":"16269_CR39","doi-asserted-by":"crossref","unstructured":"Sun C, Myers A, Vondrick C, Murphy K, Schmid C (2019) Videobert: A joint model for video and language representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7464\u20137473","DOI":"10.1109\/ICCV.2019.00756"},{"key":"16269_CR40","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi AA (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. In: Thirty-first AAAI Conference on Artificial Intelligence","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"16269_CR41","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"16269_CR42","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"16269_CR43","doi-asserted-by":"crossref","unstructured":"Traor\u00e9 A, Akhloufi MA (2020) Violence detection in videos using deep recurrent and convolutional neural networks. In: 2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 154\u2013159. IEEE","DOI":"10.1109\/SMC42975.2020.9282971"},{"key":"16269_CR44","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1016\/j.neucom.2019.12.151","volume":"435","author":"A Ullah","year":"2021","unstructured":"Ullah A, Muhammad K, Hussain T, Baik SW (2021) Conflux lstms network: A novel approach for multi-view action recognition. Neurocomputing 435:321\u2013329","journal-title":"Neurocomputing"},{"key":"16269_CR45","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3551\u20133558","DOI":"10.1109\/ICCV.2013.441"},{"key":"16269_CR46","doi-asserted-by":"crossref","unstructured":"Wang Z, She Q, Smolic A (2021) Action-net: Multipath excitation for action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13214\u201313223","DOI":"10.1109\/CVPR46437.2021.01301"},{"key":"16269_CR47","doi-asserted-by":"crossref","unstructured":"Willems G, Tuytelaars T, Gool LV (2008) An efficient dense and scale-invariant spatio-temporal interest point detector. In: European Conference on Computer Vision, pp. 650\u2013663. Springer","DOI":"10.1007\/978-3-540-88688-4_48"},{"key":"16269_CR48","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) Cvt: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22\u201331","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"16269_CR49","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1016\/j.neucom.2020.04.150","volume":"441","author":"J Xu","year":"2021","unstructured":"Xu J, Song R, Wei H, Guo J, Zhou Y, Huang X (2021) A fast human action recognition network based on spatio-temporal features. Neurocomputing 441:350\u2013358","journal-title":"Neurocomputing"},{"key":"16269_CR50","doi-asserted-by":"crossref","unstructured":"Yue-Hei\u00a0Ng J, Hausknecht M, Vijayanarasimhan S, Vinyals O, Monga R, Toderici G (2015) Beyond short snippets: Deep networks for video classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4694\u20134702","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"16269_CR51","doi-asserted-by":"crossref","unstructured":"Zheng C, Zhu S, Mendieta M, Yang T, Chen C, Ding Z (2021) 3d human pose estimation with spatial and temporal transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11656\u201311665","DOI":"10.1109\/ICCV48922.2021.01145"},{"issue":"4","key":"16269_CR52","doi-asserted-by":"publisher","first-page":"4218","DOI":"10.1007\/s10489-022-03708-9","volume":"53","author":"T Zhenhua","year":"2023","unstructured":"Zhenhua T, Zhenche X, Pengfei W, Chang D, Weichao Z (2023) Ftcf: Full temporal cross fusion network for violence detection in videos. Applied Intelligence 53(4):4218\u20134230","journal-title":"Applied Intelligence"},{"key":"16269_CR53","doi-asserted-by":"crossref","unstructured":"Zhou P, Ding Q, Luo H, Hou X (2017) Violent interaction detection in video based on deep learning. In: Journal of Physics: Conference Series, vol. 844, p. 012044. IOP Publishing","DOI":"10.1088\/1742-6596\/844\/1\/012044"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16269-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16269-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16269-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T21:19:13Z","timestamp":1729804753000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16269-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,24]]},"references-count":53,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2024,4]]}},"alternative-id":["16269"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16269-x","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2023,7,24]]},"assertion":[{"value":"1 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 April 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 July 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}