{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:18:41Z","timestamp":1778084321353,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819681693","type":"print"},{"value":"9789819681709","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8170-9_10","type":"book-chapter","created":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T18:50:10Z","timestamp":1749927010000},"page":"121-133","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["MDVAD: Multimodal Diffusion for\u00a0Video Anomaly Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0177-6802","authenticated-orcid":false,"given":"Kijung","family":"Lee","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2773-7670","authenticated-orcid":false,"given":"Youngwan","family":"Jo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9881-5731","authenticated-orcid":false,"given":"Sunghyun","family":"Ahn","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5196-6193","authenticated-orcid":false,"given":"Sanghyun","family":"Park","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,15]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Acsintoae, A., et al.: UBnormal: new benchmark for supervised open-set video anomaly detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20143\u201320153 (2022)","DOI":"10.1109\/CVPR52688.2022.01951"},{"key":"10_CR2","doi-asserted-by":"publisher","first-page":"103656","DOI":"10.1016\/j.cviu.2023.103656","volume":"229","author":"A B\u0103rb\u0103l\u0103u","year":"2022","unstructured":"B\u0103rb\u0103l\u0103u, A., et al.: SSMTL++: revisiting self-supervised multi-task learning for video anomaly detection. Comput. Vis. Image Underst. 229, 103656 (2022)","journal-title":"Comput. Vis. Image Underst."},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, action recognition? A new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Chen, W., Ma, K.T., Yew, Z.J., Hur, M., Khoo, D.A.A.: TEVAD: improved video anomaly detection with captions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5548\u20135558 (2023)","DOI":"10.1109\/CVPRW59228.2023.00587"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Duan, H., Zhao, Y., Chen, K., Lin, D., Dai, B.: Revisiting skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2969\u20132978 (2022)","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"10_CR6","doi-asserted-by":"publisher","first-page":"110817","DOI":"10.1016\/j.patcog.2024.110817","volume":"156","author":"A Flaborea","year":"2024","unstructured":"Flaborea, A., di Melendugno, G., D\u2019arrigo, S., Sterpa, M.A., Sampieri, A., Galasso, F.: Contracting skeletal kinematics for human-related video anomaly detection. Pattern Recogn. 156, 110817 (2024)","journal-title":"Pattern Recogn."},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., Chen, D.: SimCSE: simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"10_CR8","unstructured":"Ge, Z., Liu, S., Wang, F., Li, Z., Sun, J.: YOLOx: exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430 (2021)"},{"key":"10_CR9","first-page":"4505","volume":"44","author":"MI Georgescu","year":"2020","unstructured":"Georgescu, M.I., Ionescu, R.T., Khan, F.S., Popescu, M.C., Shah, M.: A background-agnostic framework with adversarial training for abnormal event detection in video. IEEE Trans. Pattern Anal. Mach. Intell. 44, 4505\u20134523 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR10","unstructured":"Kanu-Asiegbu, A.M., Vasudevan, R., Du, X.: BiPOCO: bi-directional trajectory prediction with pose constraints for pedestrian anomaly detection. arXiv preprint arXiv:2207.02281 (2022)"},{"key":"10_CR11","doi-asserted-by":"publisher","first-page":"482","DOI":"10.1016\/j.neucom.2021.12.023","volume":"490","author":"N Li","year":"2022","unstructured":"Li, N., Chang, F., Liu, C.: Human-related anomalous event detection via spatial-temporal graph convolutional autoencoder with embedded long short-term memory network. Neurocomputing 490, 482\u2013494 (2022)","journal-title":"Neurocomputing"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Lin, K., et al.: SwinBERT: end-to-end transformers with sparse attention for video captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17949\u201317958 (2022)","DOI":"10.1109\/CVPR52688.2022.01742"},{"issue":"11","key":"10_CR13","doi-asserted-by":"publisher","first-page":"7505","DOI":"10.1109\/TPAMI.2021.3129349","volume":"44","author":"W Luo","year":"2021","unstructured":"Luo, W., Liu, W., Lian, D., Gao, S.: Future frame prediction network for video anomaly detection. IEEE Trans. Pattern Anal. Mach. Intell. 44(11), 7505\u20137520 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Markovitz, A., Sharir, G., Friedman, I., Zelnik-Manor, L., Avidan, S.: Graph embedded pose clustering for anomaly detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10536\u201310544 (2019)","DOI":"10.1109\/CVPR42600.2020.01055"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Morais, R., Le, V., Tran, T., Saha, B., Mansour, M.R., Venkatesh, S.: Learning regularity in skeleton trajectories for anomaly detection in videos. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11988\u201311996 (2019)","DOI":"10.1109\/CVPR.2019.01227"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Ristea, N.C., et al.: Self-distilled masked auto-encoders are efficient video anomaly detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15984\u201315995 (2024)","DOI":"10.1109\/CVPR52733.2024.01513"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Sun, S., Gong, X.: Hierarchical semantic contrast for scene-aware video anomaly detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22846\u201322856 (2023)","DOI":"10.1109\/CVPR52729.2023.02188"},{"issue":"10","key":"10_CR19","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3349\u20133364 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Yan, C., Zhang, S., Liu, Y., Pang, G., Wang, W.: Feature prediction diffusion model for video anomaly detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5527\u20135537 (2023)","DOI":"10.1109\/ICCV51070.2023.00509"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, M., et al.: Multi-scale video anomaly detection by multi-grained spatio-temporal representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17385\u201317394 (2024)","DOI":"10.1109\/CVPR52733.2024.01646"},{"key":"10_CR22","doi-asserted-by":"publisher","unstructured":"Zhang, Y., et al.: Bytetrack: multi-object tracking by associating every detection box. In: European Conference on Computer Vision, pp. 1\u201321. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20047-2_1","DOI":"10.1007\/978-3-031-20047-2_1"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., Ma, Y.: Single-image crowd counting via multi-column convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 589\u2013597 (2016)","DOI":"10.1109\/CVPR.2016.70"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8170-9_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T18:50:16Z","timestamp":1749927016000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8170-9_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819681693","9789819681709"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8170-9_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sydney, NSW","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}