{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T18:22:52Z","timestamp":1778696572288,"version":"3.51.4"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T00:00:00Z","timestamp":1698451200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T00:00:00Z","timestamp":1698451200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-023-02294-y","type":"journal-article","created":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T06:02:28Z","timestamp":1698472948000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["CViT: A Convolution Vision Transformer for Video Abnormal Behavior Detection and Localization"],"prefix":"10.1007","volume":"4","author":[{"given":"Sanjay","family":"Roka","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4435-675X","authenticated-orcid":false,"given":"Manoj","family":"Diwakar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,28]]},"reference":[{"key":"2294_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2020.102765","volume":"67","author":"Z Hu","year":"2020","unstructured":"Hu Z, Zhang L, Li S, Sun D. Parallel spatial-temporal convolutional neural networks for anomaly detection and location in crowded scenes. J Vis Commun Image Represent. 2020;67: 102765.","journal-title":"J Vis Commun Image Represent"},{"key":"2294_CR2","doi-asserted-by":"publisher","first-page":"107842","DOI":"10.1109\/ACCESS.2021.3100678","volume":"9","author":"Z Yang","year":"2021","unstructured":"Yang Z, Liu J, Wu P. Bidirectional retrospective generation adversarial network for anomaly detection in videos. IEEE Access. 2021;9:107842\u201357.","journal-title":"IEEE Access"},{"key":"2294_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108703","volume":"129","author":"MyeongAh Cho","year":"2022","unstructured":"Cho MyeongAh, Kim T, Kim WJ, Cho S, Lee S. Unsupervised video anomaly detection via normalizing flows with implicit latent features. Pattern Recogn. 2022;129: 108703.","journal-title":"Pattern Recogn"},{"key":"2294_CR4","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1016\/j.neunet.2019.11.002","volume":"122","author":"M Gong","year":"2020","unstructured":"Gong M, Zeng H, Xie Y, Li H, Tang Z. Local distinguishability aggrandizing network for human anomaly detection. Neural Netw. 2020;122:364\u201373.","journal-title":"Neural Netw"},{"issue":"1","key":"2294_CR5","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"J Shuiwang","year":"2013","unstructured":"Shuiwang J, Ming Y, Kai Y. 3d convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell. 2013;35(1):221\u201331.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2294_CR6","doi-asserted-by":"crossref","unstructured":"Du T, Bourdev L, Fergus R, Torresani L, Paluri M. Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, IEEE, 2016. p. 4489\u20134497.","DOI":"10.1109\/ICCV.2015.510"},{"key":"2294_CR7","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Houlsby N. An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929. 2020."},{"key":"2294_CR8","doi-asserted-by":"crossref","unstructured":"Arnab A, Dehghani M, Heigold G, Sun C, Lu\u010di\u0107 M, Schmid C. Vivit: a video vision transformer. In: Proceedings of the IEEE\/CVF international conference on computer vision, IEEE, 2021. p. 6836\u20136846.","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2294_CR9","unstructured":"Chen J, Lu Y, Yu Q, Luo X, Adeli E, Wang Y, Zhou Y. Transunet: transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306. 2021."},{"key":"2294_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3198130","volume":"60","author":"P Jin","year":"2022","unstructured":"Jin P, Mou L, Xia G-S, Zhu XX. Anomaly detection in aerial videos with transformers. IEEE Trans Geosci Remote Sens. 2022;60:1\u201313. https:\/\/doi.org\/10.1109\/TGRS.2022.3198130.","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"2294_CR11","doi-asserted-by":"crossref","unstructured":"Hasan M, Choi J, Neumann J, Roy-Chowdhury AK, Davis LS. Learning temporal regularity in video sequences. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR), IEEE, 2016. p. 733\u2013742.","DOI":"10.1109\/CVPR.2016.86"},{"key":"2294_CR12","doi-asserted-by":"crossref","unstructured":"Chong YS, Tay YH. Abnormal event detection in videos using spatiotemporal autoencoder. In: Advances in neural networks-ISNN 2017: 14th International Symposium, ISNN 2017, Sapporo, Hakodate, and Muroran, Hokkaido, Japan, June 21\u201326, 2017, Proceedings, Part II 14. Springer International Publishing; 2017. p. 189\u201396.","DOI":"10.1007\/978-3-319-59081-3_23"},{"key":"2294_CR13","doi-asserted-by":"crossref","unstructured":"Luo W, Liu W, Gao S. Remembering history with convolutional lstm for anomaly detection. In: 2017 IEEE ICME. IEEE; 2017. p. 439\u201344.","DOI":"10.1109\/ICME.2017.8019325"},{"key":"2294_CR14","doi-asserted-by":"crossref","unstructured":"Nguyen TN, Meunier J. Anomaly detection in video sequence with appearance-motion correspondence. In: Proceedings of the IEEE\/CVF international conference on computer vision, IEEE, 2019. p. 1273\u20131283.","DOI":"10.1109\/ICCV.2019.00136"},{"key":"2294_CR15","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.neucom.2019.08.044","volume":"369","author":"N Li","year":"2019","unstructured":"Li N, Chang F. Video anomaly detection and localization via multivariate gaussian fully convolution adversarial autoencoder. Neurocomputing. 2019;369:92\u2013105.","journal-title":"Neurocomputing"},{"key":"2294_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2020.102920","volume":"195","author":"Y Fan","year":"2020","unstructured":"Fan Y, Wen G, Li D, Qiu S, Levine MD, Xiao F. Video anomaly detection and localization via gaussian mixture fully convolutional variational autoencoder. Comput Vis Image Underst. 2020;195: 102920.","journal-title":"Comput Vis Image Underst"},{"key":"2294_CR17","unstructured":"Medel JR, Savakis A. Anomaly detection in video using predictive convolutional long short-term memory networks. arXiv preprint arXiv:1612.00390. 2016."},{"key":"2294_CR18","doi-asserted-by":"crossref","unstructured":"Liu W, Luo W, Lian D, Gao S. Future frame prediction for anomaly detection\u2013a new baseline. In: Proceedings of the IEEE conference on computer vision and pattern recognition, IEEE, 2018. p. 6536\u20136545.","DOI":"10.1109\/CVPR.2018.00684"},{"key":"2294_CR19","doi-asserted-by":"crossref","unstructured":"Zhao Y, Deng B, Shen C, Liu Y, Lu H, Hua XS. Spatio-temporal autoencoder for video anomaly detection. In: Proceedings of the 25th ACM international conference on multimedia, ACM, 2017. p. 1933\u20131941.","DOI":"10.1145\/3123266.3123451"},{"key":"2294_CR20","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/j.patrec.2019.11.024","volume":"129","author":"Y Tang","year":"2020","unstructured":"Tang Y, Zhao L, Zhang S, Gong C, Li G, Yang J. Integrating prediction and reconstruction for anomaly detection. Pattern Recogn Lett. 2020;129:123\u201330.","journal-title":"Pattern Recogn Lett"},{"key":"2294_CR21","doi-asserted-by":"publisher","first-page":"123977","DOI":"10.1109\/ACCESS.2021.3109102","volume":"9","author":"H Yuan","year":"2021","unstructured":"Yuan H, Cai Z, Zhou H, Wang Y, Chen X. TransAnomaly: video anomaly detection using video vision transformer. IEEE Access. 2021;9:123977\u201386. https:\/\/doi.org\/10.1109\/ACCESS.2021.3109102.","journal-title":"IEEE Access"},{"key":"2294_CR22","doi-asserted-by":"publisher","unstructured":"Rawat A, Singh Samant S. Comparative Analysis of Transformer based Models for Question Answering. In: 2022 2nd International CISCT, IEEE, Dehradun, India; 2022. p. 1\u20136. https:\/\/doi.org\/10.1109\/CISCT55310.2022.10046525.","DOI":"10.1109\/CISCT55310.2022.10046525"},{"key":"2294_CR23","doi-asserted-by":"publisher","first-page":"46717","DOI":"10.1109\/ACCESS.2022.3171559","volume":"10","author":"Y Lee","year":"2022","unstructured":"Lee Y, Kang P. AnoViT: unsupervised anomaly detection and localization with vision transformer-based encoder-decoder. IEEE Access. 2022;10:46717\u201324. https:\/\/doi.org\/10.1109\/ACCESS.2022.3171559.","journal-title":"IEEE Access"},{"key":"2294_CR24","doi-asserted-by":"publisher","first-page":"114683","DOI":"10.1109\/ACCESS.2022.3216930","volume":"10","author":"Y Duan","year":"2022","unstructured":"Duan Y, Xiang M, Zhou B, Fu D, Liu H. TCAD: unsupervised anomaly detection based on global local representation differences. IEEE Access. 2022;10:114683\u201393. https:\/\/doi.org\/10.1109\/ACCESS.2022.3216930.","journal-title":"IEEE Access"},{"key":"2294_CR25","unstructured":"Xiao T, Singh M, Mintun E, Darrell T, Dollar P, Girshick R. Early convolutions help transformers see better. arXiv preprint arXiv:2106.14881. 2021."},{"issue":"6","key":"2294_CR26","doi-asserted-by":"publisher","first-page":"2301","DOI":"10.1109\/TNNLS.2021.3083152","volume":"33","author":"X Wang","year":"2022","unstructured":"Wang X, et al. Robust unsupervised video anomaly detection by multipath frame prediction. IEEE Trans Neural Netw Learn Syst. 2022;33(6):2301\u201312.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2294_CR27","unstructured":"Bochkovskiy A, Wang CY, Liao HYM. Yolov4: optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934. 2020."},{"key":"2294_CR28","doi-asserted-by":"publisher","first-page":"50312","DOI":"10.1109\/ACCESS.2020.2979869","volume":"8","author":"T Ganokratanaa","year":"2020","unstructured":"Ganokratanaa T, Aramvith S, Sebe N. Unsupervised anomaly detection and localization based on deep spatiotemporal translation network. IEEE Access. 2020;8:50312\u201329. https:\/\/doi.org\/10.1109\/ACCESS.2020.2979869.","journal-title":"IEEE Access"},{"key":"2294_CR29","doi-asserted-by":"publisher","first-page":"150903","DOI":"10.1109\/ACCESS.2021.3126335","volume":"9","author":"S Saypadith","year":"2021","unstructured":"Saypadith S, Onoye T. An approach to detect anomaly in video using deep generative network. IEEE Access. 2021;9:150903\u201310. https:\/\/doi.org\/10.1109\/ACCESS.2021.3126335.","journal-title":"IEEE Access"},{"key":"2294_CR30","doi-asserted-by":"publisher","first-page":"6208","DOI":"10.1109\/ACCESS.2022.3142247","volume":"10","author":"E Cruz-Esquivel","year":"2022","unstructured":"Cruz-Esquivel E, Guzman-Zavaleta ZJ. An examination on autoencoder designs for anomaly detection in video surveillance. IEEE Access. 2022;10:6208\u201317. https:\/\/doi.org\/10.1109\/ACCESS.2022.3142247.","journal-title":"IEEE Access"},{"issue":"1","key":"2294_CR31","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/TPAMI.2013.111","volume":"36","author":"W Li","year":"2014","unstructured":"Li W, Mahadevan V, Vasconcelos N. Anomaly detection and localization in crowded scenes. IEEE Trans Pattern Anal Mach Intell. 2014;36(1):18\u201332.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"8","key":"2294_CR32","doi-asserted-by":"publisher","first-page":"5427","DOI":"10.1109\/TCSVT.2022.3148392","volume":"32","author":"S Zhang","year":"2022","unstructured":"Zhang S, et al. Influence-aware attention networks for anomaly detection in surveillance videos. IEEE Trans Circuits Syst Video Technol. 2022;32(8):5427\u201337. https:\/\/doi.org\/10.1109\/TCSVT.2022.3148392.","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2294_CR33","doi-asserted-by":"crossref","unstructured":"Wang S, Zeng Y, Liu Q, Zhu C, Zhu E, Yin J. Detecting abnormality without knowing normality: A two-stage approach for unsupervised video abnormal event de-tection. In: Proc. 26th ACM Int. Conf, ACM, Multimedia. Seoul, South Korea; 2018. p. 636\u2013644.","DOI":"10.1145\/3240508.3240615"},{"key":"2294_CR34","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.ins.2020.03.034","volume":"524","author":"H Fanta","year":"2020","unstructured":"Fanta H, Shao Z, Ma L. SiTGRU: single-tunnelled gated recurrent unit for abnormality detection. Inf Sci. 2020;524:15\u201332.","journal-title":"Inf Sci"},{"issue":"8","key":"2294_CR35","doi-asserted-by":"publisher","first-page":"3572","DOI":"10.1109\/TNNLS.2021.3053563","volume":"33","author":"J Yu","year":"2022","unstructured":"Yu J, Lee Y, Yow KC, Jeon M, Pedrycz W. Abnormal event detection and localization via adversarial event prediction. IEEE Trans Neural Netw Learn Syst. 2022;33(8):3572\u201386. https:\/\/doi.org\/10.1109\/TNNLS.2021.3053563.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2294_CR36","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1016\/j.neucom.2019.11.087","volume":"383","author":"X Hu","year":"2020","unstructured":"Hu X, Dai J, Huang YP, Yang HM, Zhang L, Chen WM, Yang GK, Zhang DW. A weakly supervised framework for abnormal behavior detection and localization. Neurocomputing. 2020;383:270\u201381.","journal-title":"Neurocomputing"},{"key":"2294_CR37","unstructured":"Mehta S, Rastegari M. Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178. 2021"},{"key":"2294_CR38","doi-asserted-by":"crossref","unstructured":"He K, Sun J. Convolutional neural networks at constrained time cost. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2015. p. 5353\u20135360.","DOI":"10.1109\/CVPR.2015.7299173"},{"key":"2294_CR39","unstructured":"https:\/\/ai.stackexchange.com\/questions\/22969\/what-is-the-time-complexity-of-the-upsampling-stage-of-the-u-net?rq=1. Accessed 15 Feb 2023"},{"key":"2294_CR40","unstructured":"Rabe MN, Staats C. Self-attention does not need $ O (n^ 2) $ memory. arXiv preprint arXiv:2112.05682. 2021."},{"issue":"1","key":"2294_CR41","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1109\/TII.2019.2938527","volume":"16","author":"R Nawaratne","year":"2020","unstructured":"Nawaratne R, Alahakoon D, De Silva D, Yu X. Spatiotemporal anomaly detection using deep learning for real-time video surveillance. IEEE Trans Industr Inf. 2020;16(1):393\u2013402. https:\/\/doi.org\/10.1109\/TII.2019.2938527.","journal-title":"IEEE Trans Industr Inf"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02294-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-023-02294-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02294-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T23:59:23Z","timestamp":1730419163000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-023-02294-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,28]]},"references-count":41,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2023,11]]}},"alternative-id":["2294"],"URL":"https:\/\/doi.org\/10.1007\/s42979-023-02294-y","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,28]]},"assertion":[{"value":"28 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"829"}}