{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T17:23:33Z","timestamp":1755797013381,"version":"3.40.5"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022YFE0133100"],"award-info":[{"award-number":["2022YFE0133100"]}]},{"name":"Hong Kong RGC General Research Fund","award":["152211\/23E","15216424\/24E"],"award-info":[{"award-number":["152211\/23E","15216424\/24E"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102327"],"award-info":[{"award-number":["62102327"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"PolyU Internal Fund","award":["P0043932"],"award-info":[{"award-number":["P0043932"]}]},{"DOI":"10.13039\/100030672","name":"NVIDIA AI Technology Center, University of Florida","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100030672","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tmm.2024.3521718","type":"journal-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:23:06Z","timestamp":1734981786000},"page":"2293-2303","source":"Crossref","is-referenced-by-count":3,"title":["Vision-Language Meets the Skeleton: Progressively Distillation With Cross-Modal Knowledge for 3D Action Representation Learning"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5752-1639","authenticated-orcid":false,"given":"Yang","family":"Chen","sequence":"first","affiliation":[{"name":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1404-7183","authenticated-orcid":false,"given":"Tian","family":"He","sequence":"additional","affiliation":[{"name":"School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"given":"Junfeng","family":"Fu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7007-4740","authenticated-orcid":false,"given":"Ling","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0449-4525","authenticated-orcid":false,"given":"Jingcai","family":"Guo","sequence":"additional","affiliation":[{"name":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, China"}]},{"given":"Ting","family":"Hu","sequence":"additional","affiliation":[{"name":"No.1 Orthopedic Hospital of Chengdu, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5532-9530","authenticated-orcid":false,"given":"Hong","family":"Cheng","sequence":"additional","affiliation":[{"name":"School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/CVPR52729.2023.00626"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/MMUL.2012.24"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/ICCV48922.2021.01311"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/CVPR52688.2022.01955"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/ICCV51070.2023.00941"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/CVPR46437.2021.00471"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/ICCV51070.2023.01279"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/CVPR52729.2023.00234"},{"key":"ref9","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"year":"2021","author":"Wang","article-title":"ActionCLIP: A new paradigm for video action recognition","key":"ref10"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ICCV51070.2023.00943"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref13","first-page":"49250","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu","year":"2023"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1145\/2964284.2967191"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1016\/j.patcog.2017.02.030"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1007\/978-3-319-46487-9_50"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/ICCV.2017.233"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/TPAMI.2022.3157033"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ICCV48922.2021.01317"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ICMEW59549.2023.00045"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/TMM.2023.3253048"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/TMM.2021.3129616"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/TMM.2023.3307933"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/TMM.2024.3405712"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/JIOT.2022.3190020"},{"key":"ref26","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jia","year":"2021"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/IJCNN54540.2023.10191686"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/ICCVW60793.2023.00086"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/ICME55011.2023.00452"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1007\/978-3-031-20047-2_21"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/CVPR52729.2023.00640"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/ICME55011.2023.00014"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/ICCV48922.2021.00935"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/CVPR52729.2023.00673"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1109\/CVPR52688.2022.01594"},{"key":"ref36","first-page":"78496","article-title":"CWCL: Cross-modal transfer with continuously weighted contrastive loss","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Srinivasa","year":"2023"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1109\/CVPR46437.2021.01274"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1609\/aaai.v38i3.27955"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1609\/aaai.v38i16.29789"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1609\/aaai.v36i1.19957"},{"year":"2018","author":"Oord","article-title":"Representation learning with contrastive predictive coding","key":"ref41"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/ICCV48922.2021.00650"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1109\/CVPR42600.2020.01465"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1109\/CVPR.2016.115"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1109\/TPAMI.2019.2916873"},{"year":"2017","author":"Chunhui","article-title":"PKU-MMD: A large scale benchmark for continuous multi-modal human action understanding","key":"ref46"},{"doi-asserted-by":"publisher","key":"ref47","DOI":"10.1007\/978-3-031-19772-7_3"},{"doi-asserted-by":"publisher","key":"ref48","DOI":"10.1007\/978-3-031-20062-5_42"},{"doi-asserted-by":"publisher","key":"ref49","DOI":"10.1109\/CVPR52729.2023.01807"},{"doi-asserted-by":"publisher","key":"ref50","DOI":"10.1109\/TIP.2023.3338410"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1145\/3581783.3612449"},{"doi-asserted-by":"publisher","key":"ref52","DOI":"10.1145\/3581783.3611774"},{"doi-asserted-by":"publisher","key":"ref53","DOI":"10.1609\/aaai.v32i1.12328"},{"doi-asserted-by":"publisher","key":"ref54","DOI":"10.1109\/CVPR42600.2020.00026"},{"doi-asserted-by":"publisher","key":"ref55","DOI":"10.1007\/978-3-030-58571-6_3"},{"doi-asserted-by":"publisher","key":"ref56","DOI":"10.1145\/3394171.3413548"},{"doi-asserted-by":"publisher","key":"ref57","DOI":"10.1109\/ICCV48922.2021.01308"},{"doi-asserted-by":"publisher","key":"ref58","DOI":"10.1145\/3474085.3475307"},{"doi-asserted-by":"publisher","key":"ref59","DOI":"10.1007\/978-3-031-19809-0_11"},{"doi-asserted-by":"publisher","key":"ref60","DOI":"10.1609\/aaai.v37i1.25127"},{"doi-asserted-by":"publisher","key":"ref61","DOI":"10.1609\/aaai.v32i1.11853"},{"doi-asserted-by":"publisher","key":"ref62","DOI":"10.1109\/CVPR42600.2020.00965"},{"doi-asserted-by":"publisher","key":"ref63","DOI":"10.1609\/aaai.v37i3.25451"},{"issue":"11","key":"ref64","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/10844992\/10812782.pdf?arnumber=10812782","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T17:47:37Z","timestamp":1747158457000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10812782\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/tmm.2024.3521718","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"type":"print","value":"1520-9210"},{"type":"electronic","value":"1941-0077"}],"subject":[],"published":{"date-parts":[[2025]]}}}