{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T16:10:51Z","timestamp":1780675851877,"version":"3.54.1"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"State Key Development Program in 14th Five-Year","award":["2021YFF0900701"],"award-info":[{"award-number":["2021YFF0900701"]}]},{"name":"State Key Development Program in 14th Five-Year","award":["2021YFF0602103"],"award-info":[{"award-number":["2021YFF0602103"]}]},{"name":"State Key Development Program in 14th Five-Year","award":["2021YFF0602102"],"award-info":[{"award-number":["2021YFF0602102"]}]},{"name":"State Key Development Program in 14th Five-Year","award":["2021QY1702"],"award-info":[{"award-number":["2021QY1702"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61801441"],"award-info":[{"award-number":["61801441"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/lsp.2024.3356910","type":"journal-article","created":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T18:53:06Z","timestamp":1705949586000},"page":"476-480","source":"Crossref","is-referenced-by-count":11,"title":["Violent Video Recognition Based on Global-Local Visual and Audio Contrastive Learning"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0917-001X","authenticated-orcid":false,"given":"Zihao","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3481-7820","authenticated-orcid":false,"given":"Xiaoyu","family":"Wu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7809-1932","authenticated-orcid":false,"given":"Shengjin","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9262-2117","authenticated-orcid":false,"given":"Yimeng","family":"Shang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SMC42975.2020.9282971"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682833"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1142\/s0218001422550023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3465481.3470059"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2992617"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10212654"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3227818"},{"key":"ref9","article-title":"Representation learning with contrastive predictive coding","author":"Oord","year":"2018"},{"key":"ref10","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3178899"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2023.3313090"},{"key":"ref13","first-page":"7774","article-title":"Cooperative learning of audio and video models from self-supervised synchronization","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Korbar","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01229"},{"key":"ref15","first-page":"25","article-title":"Self-supervised multimodal versatile networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Alayrac","year":"2020"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26162"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00129"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2019.00630"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-18913-5_48"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-014-1984-4"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2012.6239348"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_5"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/AVSS.2017.8078468"},{"key":"ref27","article-title":"Improving video violence recognition with human interaction learning on 3D skeleton point clouds","author":"Su","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.31.4.043030"},{"key":"ref29","first-page":"1177","article-title":"Special video classification based on multitask learning and multimodal feature fusion","volume":"32","author":"Wu","year":"2020","journal-title":"Opt. Precis. Eng."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0276939"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.09.090"},{"key":"ref32","first-page":"3225","article-title":"Special video recognition based on semantic embedding learning","volume":"51","author":"Wu","year":"2023","journal-title":"Acta Electronica Sinica"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/97\/10380231\/10411036.pdf?arnumber=10411036","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,6]],"date-time":"2024-02-06T22:37:57Z","timestamp":1707259077000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10411036\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/lsp.2024.3356910","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"value":"1070-9908","type":"print"},{"value":"1558-2361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}