{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T03:19:07Z","timestamp":1776223147470,"version":"3.50.1"},"reference-count":49,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472368"],"award-info":[{"award-number":["62472368"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302427"],"award-info":[{"award-number":["62302427"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100015308","name":"Xinjiang Uygur Autonomous Region Department of Science and Technology","doi-asserted-by":"publisher","award":["2022TSYCLJ0036"],"award-info":[{"award-number":["2022TSYCLJ0036"]}],"id":[{"id":"10.13039\/501100015308","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002855","name":"Ministry of Science and Technology of the People&apos;s Republic of China","doi-asserted-by":"publisher","award":["2022ZD0115800"],"award-info":[{"award-number":["2022ZD0115800"]}],"id":[{"id":"10.13039\/501100002855","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008845","name":"Xinjiang University","doi-asserted-by":"publisher","award":["XJDX2025YJS181"],"award-info":[{"award-number":["XJDX2025YJS181"]}],"id":[{"id":"10.13039\/501100008845","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.neunet.2026.108951","type":"journal-article","created":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T07:36:17Z","timestamp":1775633777000},"page":"108951","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["TrCLIP-VAD : Weak supervised video anomaly detection by improving CLIP training with text rewriting"],"prefix":"10.1016","volume":"201","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5643-8322","authenticated-orcid":false,"given":"Shengjie","family":"Shen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3110-4957","authenticated-orcid":false,"given":"Ziteng","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yahui","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liejun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6412-334X","authenticated-orcid":false,"given":"Zhiqing","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108951_bib0001","doi-asserted-by":"crossref","unstructured":"Ahamed, M. A., & Cheng, Q. (2024). TimeMachine: A time series is worth 4 Mambas for long-term forecasting. arXiv preprint arXiv: 2403.09898.","DOI":"10.3233\/FAIA240677"},{"key":"10.1016\/j.neunet.2026.108951_bib0002","series-title":"Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization","first-page":"65","article-title":"Meteor: An automatic metric for MT evaluation with improved correlation with human judgments","author":"Banerjee","year":"2005"},{"key":"10.1016\/j.neunet.2026.108951_bib0003","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"6299","article-title":"Quo vadis, action recognition? A new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.neunet.2026.108951_bib0004","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"5549","article-title":"TEVAD: Improved video anomaly detection with captions","author":"Chen","year":"2023"},{"key":"10.1016\/j.neunet.2026.108951_bib0005","doi-asserted-by":"crossref","first-page":"35544","DOI":"10.52202\/075280-1544","article-title":"Improving clip training with language rewrites","volume":"36","author":"Fan","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108951_bib0006","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"14009","article-title":"Mist: Multiple instance self-training framework for video anomaly detection","author":"Feng","year":"2021"},{"key":"10.1016\/j.neunet.2026.108951_bib0007","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., & Chen, D. (2021). SimCSE: Simple contrastive learning of sentence embeddings. arXiv preprint arXiv: 2104.08821.","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"10.1016\/j.neunet.2026.108951_bib0008","unstructured":"Gu, A., & Dao, T. (2023). Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv: 2312.00752."},{"key":"10.1016\/j.neunet.2026.108951_bib0009","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"733","article-title":"Learning temporal regularity in video sequences","author":"Hasan","year":"2016"},{"key":"10.1016\/j.neunet.2026.108951_bib0010","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107583","article-title":"Prototype-guided and dynamic-aware video anomaly detection","volume":"189","author":"Huang","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108951_bib0011","series-title":"2023\u202fIEEE international conference on image processing (ICIP)","first-page":"3230","article-title":"CLIP-TSA: Clip-assisted temporal self-attention for weakly-supervised video anomaly detection","author":"Joo","year":"2023"},{"key":"10.1016\/j.neunet.2026.108951_bib0012","series-title":"European conference on computer vision","first-page":"105","article-title":"Prompting visual-language models for efficient video understanding","author":"Ju","year":"2022"},{"key":"10.1016\/j.neunet.2026.108951_bib0013","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"706","article-title":"Dense-captioning events in videos","author":"Krishna","year":"2017"},{"key":"#cr-split#-10.1016\/j.neunet.2026.108951_bib0014.1","unstructured":"Lee, J., & Toutanova, K. (2018). Pre-training of deep bidirectional transformers for language understanding. 3"},{"key":"#cr-split#-10.1016\/j.neunet.2026.108951_bib0014.2","unstructured":"(8) arXiv preprint arXiv: 1810.04805."},{"key":"10.1016\/j.neunet.2026.108951_bib0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106138","article-title":"Cross-modality integration framework with prediction, perception and discrimination for video anomaly detection","volume":"172","author":"Li","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108951_bib0016","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106509","article-title":"An informative dual forknet for video anomaly detection","volume":"179","author":"Li","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108951_bib0017","series-title":"Text summarization branches out","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.neunet.2026.108951_bib0018","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"17949","article-title":"SwinBERT: End-to-end transformers with sparse attention for video captioning","author":"Lin","year":"2022"},{"key":"10.1016\/j.neunet.2026.108951_bib0019","series-title":"2018 25th IEEE international conference on image processing (ICIP)","first-page":"2281","article-title":"Temporal attention network for action proposal","author":"Liu","year":"2018"},{"key":"10.1016\/j.neunet.2026.108951_bib0020","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"8022","article-title":"Unbiased multiple instance learning for weakly supervised video anomaly detection","author":"Lv","year":"2023"},{"key":"10.1016\/j.neunet.2026.108951_bib0021","doi-asserted-by":"crossref","unstructured":"Lyu, J., Zhao, M., Hu, J., Huang, X., Chen, Y., & Du, S. (2025). VadMamba: Exploring state space models for fast video anomaly detection. arXiv preprint arXiv: 2503.21169.","DOI":"10.1109\/ICME59968.2025.11209020"},{"issue":"2","key":"10.1016\/j.neunet.2026.108951_bib0022","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1504\/IJAACS.2024.137006","article-title":"Automated anomaly detection and multi-label anomaly classification in crowd scenes based on optimal thresholding and deep learning strategy","volume":"17","author":"Modi","year":"2024","journal-title":"International Journal of Autonomous and Adaptive Communications Systems"},{"key":"10.1016\/j.neunet.2026.108951_bib0023","series-title":"European conference on computer vision","first-page":"1","article-title":"Expanding language-image pretrained models for general video recognition","author":"Ni","year":"2022"},{"key":"10.1016\/j.neunet.2026.108951_bib0024","series-title":"Proceedings of the 40th annual meeting of the association for computational linguistics","first-page":"311","article-title":"Bleu: A method for automatic evaluation of machine translation","author":"Papineni","year":"2002"},{"issue":"4","key":"10.1016\/j.neunet.2026.108951_bib0025","doi-asserted-by":"crossref","first-page":"369","DOI":"10.1504\/IJAACS.2024.139403","article-title":"An abnormal behaviour recognition of MOOC online learning based on multidimensional data mining","volume":"17","author":"Qu","year":"2024","journal-title":"International Journal of Autonomous and Adaptive Communications Systems"},{"key":"10.1016\/j.neunet.2026.108951_bib0026","series-title":"International conference on machine learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"issue":"1","key":"10.1016\/j.neunet.2026.108951_bib0027","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1504\/IJAACS.2024.135931","article-title":"IoT-based vehicular accident detection using a deep learning model","volume":"17","author":"Rani","year":"2024","journal-title":"International Journal of Autonomous and Adaptive Communications Systems"},{"key":"10.1016\/j.neunet.2026.108951_bib0028","first-page":"582","article-title":"Support vector method for novelty detection","volume":"12","author":"Sch\u00f6lkopf","year":"1999","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108951_bib0029","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"6479","article-title":"Real-world anomaly detection in surveillance videos","author":"Sultani","year":"2018"},{"key":"10.1016\/j.neunet.2026.108951_bib0030","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"4975","article-title":"Weakly-supervised video anomaly detection with robust temporal feature magnitude learning","author":"Tian","year":"2021"},{"key":"10.1016\/j.neunet.2026.108951_bib0031","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.-A., Lacroix, T., Rozi\u00e8re, B., Goyal, N., Hambro, E., Azhar, F. et al. (2023). LLAMA: Open and efficient foundation language models. arXiv preprint arXiv: 2302.13971."},{"key":"10.1016\/j.neunet.2026.108951_bib0032","series-title":"European conference on computer vision","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"Wang","year":"2016"},{"key":"10.1016\/j.neunet.2026.108951_bib0033","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"6847","article-title":"End-to-end dense video captioning with parallel decoding","author":"Wang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108951_bib0034","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"7794","article-title":"Non-local neural networks","author":"Wang","year":"2018"},{"key":"10.1016\/j.neunet.2026.108951_bib0035","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"4581","article-title":"VATEX: A large-scale, high-quality multilingual dataset for video-and-language research","author":"Wang","year":"2019"},{"key":"10.1016\/j.neunet.2026.108951_bib0036","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part XXX 16","first-page":"322","article-title":"Not only look, but also listen: Learning multimodal violence detection under weak supervision","author":"Wu","year":"2020"},{"key":"10.1016\/j.neunet.2026.108951_bib0037","doi-asserted-by":"crossref","first-page":"1674","DOI":"10.1109\/TMM.2022.3147369","article-title":"Weakly supervised audio-visual violence detection","volume":"25","author":"Wu","year":"2022","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108951_bib0038","unstructured":"Wu, P., Pan, C., Yan, Y., Pang, G., Wang, P., & Zhang, Y. (2024a). Deep learning for video anomaly detection: A review. arXiv preprint arXiv: 2409.05383."},{"key":"10.1016\/j.neunet.2026.108951_bib0039","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"6074","article-title":"VadCLIP: Adapting vision-language models for weakly supervised video anomaly detection","volume":"vol. 38","author":"Wu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108951_bib0040","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"18899","article-title":"Text prompt with normality guidance for weakly supervised video anomaly detection","author":"Yang","year":"2024"},{"key":"10.1016\/j.neunet.2026.108951_bib0041","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"18527","article-title":"Harnessing large language models for training-free video anomaly detection","author":"Zanella","year":"2024"},{"key":"10.1016\/j.neunet.2026.108951_bib0042","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.127154","article-title":"MSTAgent-VAD: Multi-scale video anomaly detection using time agent mechanism for segments\u2019 temporal context mining","volume":"276","author":"Zhao","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.neunet.2026.108951_bib0043","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1237","article-title":"Graph convolutional label noise cleaner: Train a plug-and-play action classifier for anomaly detection","author":"Zhong","year":"2019"},{"key":"10.1016\/j.neunet.2026.108951_bib0044","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"3769","article-title":"Dual memory units with uncertainty regulation for weakly supervised video anomaly detection","volume":"vol. 37","author":"Zhou","year":"2023"},{"issue":"9","key":"10.1016\/j.neunet.2026.108951_bib0045","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","article-title":"Learning to prompt for vision-language models","volume":"130","author":"Zhou","year":"2022","journal-title":"International Journal of Computer Vision"},{"issue":"12","key":"10.1016\/j.neunet.2026.108951_bib0046","doi-asserted-by":"crossref","first-page":"13642","DOI":"10.1109\/TCSVT.2024.3450734","article-title":"Batchnorm-based weakly supervised video anomaly detection","volume":"34","author":"Zhou","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108951_bib0047","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11175","article-title":"ZegCLIP: Towards adapting clip for zero-shot semantic segmentation","author":"Zhou","year":"2023"},{"key":"10.1016\/j.neunet.2026.108951_bib0048","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., & Wang, X. (2024). Vision Mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv: 2401.09417."}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026004120?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026004120?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T02:32:48Z","timestamp":1776220368000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026004120"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":49,"alternative-id":["S0893608026004120"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108951","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"TrCLIP-VAD : Weak supervised video anomaly detection by improving CLIP training with text rewriting","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108951","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108951"}}