{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T09:40:22Z","timestamp":1758706822808,"version":"3.40.4"},"reference-count":97,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62125603","62373043"],"award-info":[{"award-number":["62125603","62373043"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Young Elite Scientists Sponsorship Program by CAST","award":["2023QNRC001"],"award-info":[{"award-number":["2023QNRC001"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tpami.2025.3538675","type":"journal-article","created":{"date-parts":[[2025,2,4]],"date-time":"2025-02-04T18:35:08Z","timestamp":1738694108000},"page":"3863-3877","source":"Crossref","is-referenced-by-count":2,"title":["Transferable Unintentional Action Localization With Language-Guided Intention Translation"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1553-5441","authenticated-orcid":false,"given":"Jinglin","family":"Xu","sequence":"first","affiliation":[{"name":"School of Intelligence Science and Technology, University of Science and Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3952-8753","authenticated-orcid":false,"given":"Yongming","family":"Rao","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7701-234X","authenticated-orcid":false,"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6121-5529","authenticated-orcid":false,"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00685"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"article-title":"A large scale urban surveillance video dataset for multiple-object tracking and behavior analysis","year":"2019","author":"Yin","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2021.00.350"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1049\/cje.2021.00.455"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2021.00.093"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-6684-3662-2.ch076"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1049\/cje.2020.00.088"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"article-title":"Two-stream convolutional networks for action recognition in videos","year":"2014","author":"Simonyan","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2712608"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00446"},{"article-title":"Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training","year":"2022","author":"Tong","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.293"},{"article-title":"Temporal activity detection in untrimmed videos with recurrent neural networks","year":"2016","author":"Montes","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.317"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3090167"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3132058"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/cac53003.2021.9727692"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01136"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.544"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00161"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00039"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2927118"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00643"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00986"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.239"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00805"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3025171.3025213"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00635"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_41"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/180"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2728788"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00711"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00100"},{"article-title":"Video understanding using multimodal deep learning","year":"2020","author":"Nagrani","key":"ref47"},{"key":"ref48","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_5"},{"article-title":"Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning","year":"2022","author":"Liang","key":"ref53"},{"article-title":"UniCLIP: Unified framework for contrastive language-image pre-training","year":"2022","author":"Lee","key":"ref54"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01629"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00679"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02586"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00444"},{"article-title":"Open-vocabulary panoptic segmentation with maskclip","year":"2022","author":"Ding","key":"ref61"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10888259"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00490"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01501"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01496"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479207"},{"key":"ref69","first-page":"17980","article-title":"Scaling up vision-language pre-training for image captioning","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Hu"},{"article-title":"Actionclip: A new paradigm for video action recognition","year":"2021","author":"Wang","key":"ref70"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_1"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00302"},{"article-title":"Ucsd anomaly detection dataset","year":"2013","author":"Weixin Li","key":"ref74"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.338"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00684"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00678"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_20"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01951"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00179"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00803"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00136"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01219"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00028"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01255"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01333"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00493"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01433"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00321"},{"article-title":"BiPOCO: Bi-directional trajectory prediction with pose constraints for pedestrian anomaly detection","year":"2022","author":"Kanu-Asiegbu","key":"ref90"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01104"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3166278"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3163544"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00384"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01058"},{"article-title":"Representation learning with contrastive predictive coding","year":"2018","author":"Oord","key":"ref96"},{"article-title":"SGDR: Stochastic gradient descent with warm restarts","year":"2016","author":"Loshchilov","key":"ref97"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/10958761\/10872809.pdf?arnumber=10872809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T18:18:52Z","timestamp":1744654732000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10872809\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":97,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3538675","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}