{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T22:38:04Z","timestamp":1778539084714,"version":"3.51.4"},"reference-count":110,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1016\/j.knosys.2026.115466","type":"journal-article","created":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T16:34:02Z","timestamp":1770050042000},"page":"115466","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Hierarchical vision-language model with comprehensive language description for video anomaly detection"],"prefix":"10.1016","volume":"337","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4309-1585","authenticated-orcid":false,"given":"Muaz","family":"Al Radi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0036-2875","authenticated-orcid":false,"given":"Sajid","family":"Javed","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.knosys.2026.115466_bib0001","first-page":"2293","article-title":"A survey of single-scene video anomaly detection","volume":"44","author":"Ramachandra","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115466_bib0002","series-title":"IEEE CVPR","first-page":"6479","article-title":"Real-world anomaly detection in surveillance videos","author":"Sultani","year":"2018"},{"key":"10.1016\/j.knosys.2026.115466_bib0003","series-title":"Video anomaly detection: a systematic review of issues and prospects","first-page":"127726","author":"Samaila","year":"2024"},{"issue":"10","key":"10.1016\/j.knosys.2026.115466_bib0004","doi-asserted-by":"crossref","first-page":"2537","DOI":"10.1109\/TIFS.2019.2900907","article-title":"Anomalynet: an anomaly detection network for video surveillance","volume":"14","author":"Zhou","year":"2019","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"issue":"11","key":"10.1016\/j.knosys.2026.115466_bib0005","doi-asserted-by":"crossref","first-page":"5024","DOI":"10.3390\/s23115024","article-title":"Deep learning-based anomaly detection in video surveillance: a survey","volume":"23","author":"Duong","year":"2023","journal-title":"Sensors"},{"issue":"5","key":"10.1016\/j.knosys.2026.115466_bib0006","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/MSP.2010.937393","article-title":"Video anomaly identification","volume":"27","author":"Saligrama","year":"2010","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.knosys.2026.115466_bib0007","doi-asserted-by":"crossref","first-page":"12635","DOI":"10.1007\/s11042-022-13954-1","article-title":"Analysis of anomaly detection in surveillance video: recent trends and future vision","volume":"82","author":"Raja","year":"2023","journal-title":"Multimed. Tools Appl."},{"key":"10.1016\/j.knosys.2026.115466_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.compind.2023.103990","article-title":"Industrial anomaly detection with domain shift: a real-world dataset and masked multi-scale reconstruction","volume":"151","author":"Zhang","year":"2023","journal-title":"Comput. Ind."},{"key":"10.1016\/j.knosys.2026.115466_bib0009","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.123718","article-title":"Memory-enhanced spatial-temporal encoding framework for industrial anomaly detection system","volume":"250","author":"Liu","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.knosys.2026.115466_bib0010","doi-asserted-by":"crossref","first-page":"117788","DOI":"10.1109\/ACCESS.2023.3325896","article-title":"Deep learning technologies for time series anomaly detection in healthcare: a review","volume":"11","author":"Yang","year":"2023","journal-title":"IEEE Access"},{"issue":"2","key":"10.1016\/j.knosys.2026.115466_bib0011","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1109\/TETCI.2024.3358103","article-title":"Skeletal video anomaly detection using deep learning: survey, challenges, and future directions","volume":"8","author":"Mishra","year":"2024","journal-title":"IEEE Trans. Emerging Top. Comput. Intell."},{"issue":"6","key":"10.1016\/j.knosys.2026.115466_bib0012","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3417989","article-title":"Anomaly detection in road traffic using visual surveillance: a survey","volume":"53","author":"Santhosh","year":"2020","journal-title":"Acm Comput. Surv. (CSUR)"},{"key":"10.1016\/j.knosys.2026.115466_bib0013","series-title":"Multimedia Tools and Applications","first-page":"1","article-title":"A comprehensive analysis of real-time video anomaly detection methods for human and vehicular movement","author":"Pathirannahalage","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0014","doi-asserted-by":"crossref","unstructured":"M. Abdalla, S. Javed, M.A. Radi, A. Ulhaq, N. Werghi, Video anomaly detection in 10 years: a survey and outlook,2024, arXiv: 2405.19387.","DOI":"10.1007\/s00521-025-11659-8"},{"key":"10.1016\/j.knosys.2026.115466_bib0015","series-title":"IEEE CVPR","article-title":"Tevad: improved video anomaly detection with captions","author":"Chen","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0016","series-title":"ICCV","first-page":"4975","article-title":"Weakly-supervised video anomaly detection with robust temporal feature magnitude learning","author":"Tian","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0017","series-title":"IEEE CVPR","first-page":"26296","article-title":"Improved baselines with visual instruction tuning","author":"Liu","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0018","series-title":"IEEE CVPR","article-title":"Mist: multiple instance self-training framework for video anomaly detection","author":"Feng","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0019","series-title":"IEEE CVPR","first-page":"17385","article-title":"Multi-scale video anomaly detection by multi-grained spatio-temporal representation learning","author":"Zhang","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0020","series-title":"IEEE CVPR","first-page":"15984","article-title":"Self-distilled masked auto-encoders are efficient video anomaly detectors","author":"Ristea","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0021","series-title":"IEEE CVPR","first-page":"18297","article-title":"Open-vocabulary video anomaly detection","author":"Wu","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0022","series-title":"IEEE CVPR","first-page":"18868","article-title":"Mulde: multiscale log-density estimation via denoising score matching for video anomaly detection","author":"Micorek","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0023","series-title":"IEEE CVPR","first-page":"18899","article-title":"Text prompt with normality guidance for weakly supervised video anomaly detection","author":"Yang","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0024","series-title":"IEEE CVPR","first-page":"18319","article-title":"Prompt-enhanced multiple instance learning for weakly supervised video anomaly detection","author":"Chen","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0025","series-title":"CVPR","first-page":"16271","article-title":"Exploiting completeness and uncertainty of pseudo labels for weakly supervised video anomaly detection","author":"Zhang","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0026","series-title":"IEEE CVPR","first-page":"12416","article-title":"Collaborative learning of anomalies with privacy (clap) for unsupervised video anomaly detection: a new baseline","author":"Al-Lahham","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0027","series-title":"IEEE CVPR","first-page":"14744","article-title":"Generative cooperative learning for unsupervised video anomaly detection","author":"Zaheer","year":"2022"},{"issue":"7","key":"10.1016\/j.knosys.2026.115466_bib0028","first-page":"2609","article-title":"A deep one-class neural network for anomalous event detection in complex scenes","volume":"31","author":"Wu","year":"2019","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0029","series-title":"IEEE CVPR","first-page":"11996","article-title":"Learning regularity in skeleton trajectories for anomaly detection in videos","author":"Morais","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0030","unstructured":"P. Wu, X. Zhou, G. Pang, L. Zhou, Q. Yan, P. Wang, Y. Zhang, Vadclip: adapting vision-language models for weakly supervised video anomaly detection, 2023, arXiv: 2308.11681."},{"key":"10.1016\/j.knosys.2026.115466_bib0031","series-title":"IEEE CVPR","article-title":"Harnessing large language models for training-free video anomaly detection","author":"Zanella","year":"2024"},{"key":"10.1016\/j.knosys.2026.115466_bib0032","series-title":"CVPR","first-page":"24500","article-title":"Generating anomalies for video anomaly detection with prompt-based feature mapping","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0033","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/s11760-020-01740-1","article-title":"Residual spatiotemporal autoencoder for unsupervised video anomaly detection","volume":"15","author":"Deepak","year":"2021","journal-title":"Signal Image Video Process."},{"issue":"2","key":"10.1016\/j.knosys.2026.115466_bib0034","doi-asserted-by":"crossref","first-page":"36","DOI":"10.3390\/jimaging4020036","article-title":"An overview of deep learning based methods for unsupervised and semi-supervised anomaly detection in videos","volume":"4","author":"Kiran","year":"2018","journal-title":"J. Imaging"},{"issue":"6","key":"10.1016\/j.knosys.2026.115466_bib0035","doi-asserted-by":"crossref","first-page":"2301","DOI":"10.1109\/TNNLS.2021.3083152","article-title":"Robust unsupervised video anomaly detection by multipath frame prediction","volume":"33","author":"Wang","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0036","doi-asserted-by":"crossref","first-page":"2395","DOI":"10.1109\/TIP.2019.2948286","article-title":"Bman: bidirectional multi-scale aggregation networks for abnormal event detection","volume":"29","author":"Lee","year":"2019","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2026.115466_bib0037","series-title":"IEEE CVPR","first-page":"14372","article-title":"Learning memory-guided normality for anomaly detection","author":"Park","year":"2020"},{"issue":"8","key":"10.1016\/j.knosys.2026.115466_bib0038","doi-asserted-by":"crossref","first-page":"5625","DOI":"10.1109\/TPAMI.2024.3369699","article-title":"Vision-language models for vision tasks: a survey","volume":"46","author":"Zhang","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"10.1016\/j.knosys.2026.115466_bib0039","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","article-title":"Learning to prompt for vision-language models","volume":"130","author":"Zhou","year":"2022","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.knosys.2026.115466_bib0040","series-title":"IEEE CVPR","first-page":"16816","article-title":"Conditional prompt learning for vision-language models","author":"Zhou","year":"2022"},{"issue":"2","key":"10.1016\/j.knosys.2026.115466_bib0041","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1007\/s11263-023-01891-x","article-title":"Clip-adapter: better vision-language models with feature adapters","volume":"132","author":"Gao","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.knosys.2026.115466_bib0042","series-title":"IEEE ICIP","article-title":"Clip-tsa: clip-assisted temporal self-attention for weakly-supervised video anomaly detection","author":"Joo","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0043","series-title":"ECCV","article-title":"Not only look, but also listen: learning multimodal violence detection under weak supervision","author":"Wu","year":"2020"},{"key":"10.1016\/j.knosys.2026.115466_bib0044","series-title":"2017 IEEE ICCV","first-page":"341","article-title":"A revisit of sparse coding based anomaly detection in stacked rnn framework","author":"Luo","year":"2017"},{"key":"10.1016\/j.knosys.2026.115466_bib0045","series-title":"2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"1975","article-title":"Anomaly detection in crowded scenes","author":"Mahadevan","year":"2010"},{"key":"10.1016\/j.knosys.2026.115466_bib0046","series-title":"IEEE ICIP","article-title":"Unbiased multiple instance learning for weakly supervised video anomaly detection","author":"Lv","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0047","series-title":"ICML","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0048","series-title":"ICML","first-page":"19730","article-title":"Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0049","unstructured":"J. Achiam, S. Adler, S. Agarwal, L. Ahmad, I. Akkaya, F.L. Aleman, D. Almeida, J. Altenschmidt, S. Altman, S. Anadkat et al., Gpt-4 technical report, 2023, arXiv: 2303.08774."},{"key":"10.1016\/j.knosys.2026.115466_bib0050","unstructured":"J. Carreira, E. Noland, C. Hillier, A. Zisserman, A short note on the kinetics-700 human action dataset, 2019, arXiv: 1907.06987."},{"key":"10.1016\/j.knosys.2026.115466_bib0051","series-title":"ECCV","article-title":"Microsoft coco: common objects in context","author":"Lin","year":"2014"},{"issue":"1","key":"10.1016\/j.knosys.2026.115466_bib0052","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1186\/s40537-021-00492-0","article-title":"Text data augmentation for deep learning","volume":"8","author":"Shorten","year":"2021","journal-title":"J. Big Data"},{"key":"10.1016\/j.knosys.2026.115466_bib0053","series-title":"IEEE ICIP","article-title":"Temporal convolutional network with complementary inner bag loss for weakly supervised anomaly detection","author":"Zhang","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0054","series-title":"IEEE CVPR","article-title":"Graph convolutional label noise cleaner: train a plug-and-play action classifier for anomaly detection","author":"Zhong","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0055","series-title":"ICCV","first-page":"13598","article-title":"Ted-spad: temporal distinctiveness for self-supervised privacy-preservation for video anomaly detection","author":"Fioresi","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0056","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109567","article-title":"Rareanom: a benchmark video dataset for rare type anomalies","volume":"140","author":"Thakare","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.115466_bib0057","series-title":"ICCV","article-title":"Memorizing normality to detect anomaly: memory-augmented deep autoencoder for unsupervised anomaly detection","author":"Gong","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0058","series-title":"ICCV","first-page":"5527","article-title":"Feature prediction diffusion model for video anomaly detection","author":"Yan","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0059","series-title":"IEEE CVPR","article-title":"Eval: explainable video anomaly localization","author":"Singh","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0060","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113530","article-title":"Anomaly detection method of surveillance video based on global-local information","volume":"317","author":"Wu","year":"2025","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0061","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113600","article-title":"Learning opposite prompts for weakly supervised video anomaly detection","volume":"324","author":"Qiu","year":"2025","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0062","article-title":"Generalized cross entropy loss for training deep neural networks with noisy labels","volume":"31","author":"Zhang","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0063","series-title":"WACV","first-page":"1290","article-title":"A multi-class hinge loss for conditional gans","author":"Kavalerov","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0064","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110550","article-title":"Video anomaly detection guided by clustering learning","volume":"153","author":"Qiu","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.115466_bib0065","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.111978","article-title":"Vpe-wsvad: visual prompt exemplars for weakly-supervised video anomaly detection","volume":"299","author":"Su","year":"2024","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0066","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.109348","article-title":"Attention-based anomaly detection in multi-view surveillance videos","volume":"252","author":"Li","year":"2022","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0067","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110872","article-title":"Prime: privacy-preserving video anomaly detection via motion exemplar guidance","volume":"278","author":"Su","year":"2023","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0068","series-title":"IEEE CVPR","first-page":"16271","article-title":"Exploiting completeness and uncertainty of pseudo labels for weakly supervised video anomaly detection","author":"Zhang","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0069","first-page":"1395","article-title":"Self-training multi-sequence learning with transformer for weakly supervised video anomaly detection","volume":"36","author":"Li","year":"2022","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.knosys.2026.115466_bib0070","series-title":"IEEE CVPR","article-title":"Hierarchical semantic contrast for scene-aware video anomaly detection","author":"Sun","year":"2023"},{"issue":"6","key":"10.1016\/j.knosys.2026.115466_bib0071","doi-asserted-by":"crossref","first-page":"2313","DOI":"10.1109\/TNNLS.2021.3130074","article-title":"Mocca: multilayer one-class classification for anomaly detection","volume":"33","author":"Massoli","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0072","series-title":"ICCV","first-page":"8201","article-title":"Gods: generalized one-class discriminative subspaces for anomaly detection","author":"Wang","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0073","series-title":"IEEE CVPR","first-page":"8121","article-title":"Gmflow: learning optical flow via global matching","author":"Xu","year":"2022"},{"key":"10.1016\/j.knosys.2026.115466_bib0074","series-title":"IEEE CVPR","first-page":"747","article-title":"Bfo meets hog: feature extraction based on histograms of oriented pdf gradients for image classification","author":"Kobayashi","year":"2013"},{"key":"10.1016\/j.knosys.2026.115466_bib0075","series-title":"ICCV","first-page":"4489","article-title":"Learning spatiotemporal features with 3d convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.knosys.2026.115466_bib0076","series-title":"IEEE CVPR","article-title":"Quo vadis, action recognition? a new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.knosys.2026.115466_bib0077","series-title":"IEEE CVPR","first-page":"733","article-title":"Learning temporal regularity in video sequences","author":"Hasan","year":"2016"},{"key":"10.1016\/j.knosys.2026.115466_bib0078","series-title":"Iccv","first-page":"13588","article-title":"A hybrid video anomaly detection framework via memory-augmented flow reconstruction and flow-guided frame prediction","author":"Liu","year":"2021"},{"issue":"10","key":"10.1016\/j.knosys.2026.115466_bib0079","doi-asserted-by":"crossref","first-page":"2222","DOI":"10.1109\/TNNLS.2016.2582924","article-title":"Lstm: a search space odyssey","volume":"28","author":"Greff","year":"2016","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"10.1016\/j.knosys.2026.115466_bib0080","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","article-title":"A survey on vision transformer","volume":"45","author":"Han","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115466_bib0081","series-title":"IEEE CVPR","first-page":"14592","article-title":"Video event restoration based on keyframes for video anomaly detection","author":"Yang","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0082","series-title":"ICCV","first-page":"10330","article-title":"Video anomaly detection via sequentially learning multiple pretext tasks","author":"Shi","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0083","series-title":"CVPR","first-page":"12416","article-title":"Collaborative learning of anomalies with privacy (clap) for unsupervised video anomaly detection: a new baseline","author":"Al-Lahham","year":"2024"},{"issue":"7","key":"10.1016\/j.knosys.2026.115466_bib0084","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3645101","article-title":"Generalized video anomaly event detection: systematic taxonomy and comparison of deep models","volume":"56","author":"Liu","year":"2024","journal-title":"ACM Comput. Surv."},{"issue":"10","key":"10.1016\/j.knosys.2026.115466_bib0085","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3729222","article-title":"Networking systems for video anomaly detection: a tutorial and survey","volume":"57","author":"Liu","year":"2025","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.knosys.2026.115466_bib0086","series-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1980","article-title":"Look, listen and pay more attention: fusing multi-modal information for video violence detection","author":"Wei","year":"2022"},{"key":"10.1016\/j.knosys.2026.115466_bib0087","doi-asserted-by":"crossref","first-page":"2178","DOI":"10.1109\/LSP.2022.3216500","article-title":"Msaf: multimodal supervise-attention enhanced fusion for video anomaly detection","volume":"29","author":"Wei","year":"2022","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.knosys.2026.115466_bib0088","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","first-page":"6382","article-title":"Eda: easy data augmentation techniques for boosting performance on text classification tasks","author":"Wei","year":"2019"},{"issue":"11","key":"10.1016\/j.knosys.2026.115466_bib0089","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/219717.219748","article-title":"Wordnet: a lexical database for english","volume":"38","author":"Miller","year":"1995","journal-title":"Commun. ACM"},{"key":"10.1016\/j.knosys.2026.115466_bib0090","series-title":"IEEE CVPR","first-page":"6545","article-title":"Fine-tuned clip models are efficient video learners","author":"Rasheed","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0091","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.115466_bib0092","unstructured":"B. Gao, L. Pavel, On the properties of the softmax function with application in game theory and reinforcement learning, arXiv: 1704.00805, 2017."},{"issue":"1","key":"10.1016\/j.knosys.2026.115466_bib0093","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1023\/B:MACH.0000008084.60811.49","article-title":"Support vector data description","volume":"54","author":"Tax","year":"2004","journal-title":"Mach. Learn."},{"issue":"7","key":"10.1016\/j.knosys.2026.115466_bib0094","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1162\/089976601750264965","article-title":"Estimating the support of a high-dimensional distribution","volume":"13","author":"Sch\u00f6lkopf","year":"2001","journal-title":"Neural Comput."},{"issue":"7","key":"10.1016\/j.knosys.2026.115466_bib0095","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.knosys.2026.115466_bib0096","series-title":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining","first-page":"2623","article-title":"Optuna: a next-generation hyperparameter optimization framework","author":"Akiba","year":"2019"},{"key":"10.1016\/j.knosys.2026.115466_bib0097","series-title":"ICCV","first-page":"173","article-title":"Dance with self-attention: a new look of conditional random fields on anomaly detection in videos","author":"Purwanto","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0098","series-title":"IEEE CVPR","first-page":"12137","article-title":"Look around for anomalies: weakly-supervised anomaly detection via context-motion relational learning","author":"Cho","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0099","series-title":"ICCV","first-page":"2720","article-title":"Abnormal event detection at 150 fps in matlab","author":"Lu","year":"2013"},{"key":"10.1016\/j.knosys.2026.115466_bib0100","series-title":"IEEE CVPR","first-page":"15180","article-title":"Imagebind: one embedding space to bind them all","author":"Girdhar","year":"2023"},{"key":"10.1016\/j.knosys.2026.115466_bib0101","unstructured":"A. Grattafiori, A. Dubey, A. Jauhri, A. Pandey, A. Kadian, A. Al-Dahle, A. Letman, A. Mathur, A. Schelten, A. Vaughan et al., The llama 3 herd of models, arXiv: 2407.21783, 2024."},{"key":"10.1016\/j.knosys.2026.115466_bib0102","unstructured":"H. Lu, W. Liu, B. Zhang, B. Wang, K. Dong, B. Liu, J. Sun, T. Ren, Z. Li, H. Yang et al., Deepseek-vl: towards real-world vision-language understanding, arXiv: 2403.05525, 2024."},{"key":"10.1016\/j.knosys.2026.115466_bib0103","series-title":"Proceedings of the Computer Vision and Pattern Recognition Conference","first-page":"24265","article-title":"Just dance with pi! a poly-modal inductor for weakly-supervised video anomaly detection","author":"Majhi","year":"2025"},{"key":"10.1016\/j.knosys.2026.115466_bib0104","doi-asserted-by":"crossref","first-page":"1705","DOI":"10.1109\/LSP.2020.3025688","article-title":"A self-reasoning framework for anomaly detection using video-level labels","volume":"27","author":"Zaheer","year":"2020","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.knosys.2026.115466_bib0105","series-title":"Cleaning label noise with clusters for minimally supervised anomaly detection","author":"Zaheer","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0106","series-title":"2021 17th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)","first-page":"1","article-title":"Dam: dissimilarity attention module for weakly-supervised video anomaly detection","author":"Majhi","year":"2021"},{"key":"10.1016\/j.knosys.2026.115466_bib0107","series-title":"Proceedings of the Computer Vision and Pattern Recognition Conference","first-page":"29203","article-title":"Anomize: better open vocabulary video anomaly detection","author":"Li","year":"2025"},{"key":"10.1016\/j.knosys.2026.115466_bib0108","series-title":"Proceedings of the Computer Vision and Pattern Recognition Conference","first-page":"13843","article-title":"Holmes-vau: towards long-term video anomaly understanding at any granularity","author":"Zhang","year":"2025"},{"key":"10.1016\/j.knosys.2026.115466_bib0109","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10275","article-title":"Mixture of experts guided by gaussian splatters matters: a new approach to weakly-supervised video anomaly detection","author":"Amicantonio","year":"2025"},{"key":"10.1016\/j.knosys.2026.115466_bib0110","series-title":"ECCV","article-title":"Not only look, but also listen: learning multimodal violence detection under weak supervision","author":"Wu","year":"2020"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126002091?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126002091?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T07:35:04Z","timestamp":1772091304000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126002091"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":110,"alternative-id":["S0950705126002091"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115466","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Hierarchical vision-language model with comprehensive language description for video anomaly detection","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115466","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115466"}}