{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T22:39:06Z","timestamp":1757630346880,"version":"3.44.0"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62303428","U22B2034"],"award-info":[{"award-number":["62303428","U22B2034"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"\"Pioneer\" and \"Leading Goose\" Research and Development Program of Zhejiang","award":["2024C01104"],"award-info":[{"award-number":["2024C01104"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1109\/tcsvt.2025.3553525","type":"journal-article","created":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T20:51:28Z","timestamp":1742590288000},"page":"8807-8818","source":"Crossref","is-referenced-by-count":0,"title":["M<sup>3<\/sup>CS: Multi-Target Masked Point Modeling With Learnable Codebook and Siamese Decoders"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2848-6079","authenticated-orcid":false,"given":"Qibo","family":"Qiu","sequence":"first","affiliation":[{"name":"State Key Laboratory of CAD&#x0026;CG, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Honghui","family":"Yang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&#x0026;CG, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Jiang","sequence":"additional","affiliation":[{"name":"China Mobile (Zhejiang) Research and Innovation Institute, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shun","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7832-2518","authenticated-orcid":false,"given":"Haochao","family":"Ying","sequence":"additional","affiliation":[{"name":"Department of Big Data in Health Science, School of Public Health, and the Department of Breast Surgery and Oncology, Second Affiliated Hospital, School of Medicine, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7720-1749","authenticated-orcid":false,"given":"Haiming","family":"Gao","sequence":"additional","affiliation":[{"name":"ZJU-Hangzhou Global Scientific and Technological Innovation Center, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6399-292X","authenticated-orcid":false,"given":"Wenxiao","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Software Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9107-2354","authenticated-orcid":false,"given":"Xiaofei","family":"He","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&#x0026;CG, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240621"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/QoMEX48832.2020.9123121"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3082763"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00047"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3237328"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref7","first-page":"1298","article-title":"data2vec: A general framework for self-supervised learning in speech, vision and language","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"162","author":"Baevski"},{"key":"ref8","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref9","article-title":"Self-supervised learning for pre-training 3D point clouds: A survey","author":"Fei","year":"2023","journal-title":"arXiv:2305.04691"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00907"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_35"},{"key":"ref12","first-page":"27061","article-title":"Point-M2AE: Multi-scale masked autoencoders for hierarchical point cloud pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3405069"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3285803"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01852-4"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00213"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2012.2206675"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3023051"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3364175"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3370001"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612224"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3048623"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00660"},{"key":"ref24","article-title":"Explore in-context learning for 3D point cloud understanding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Fang"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3340740"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01871"},{"key":"ref27","article-title":"Discrete variational autoencoders","author":"Rolfe","year":"2016","journal-title":"arXiv:1609.02200"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122563"},{"key":"ref29","article-title":"Neural machine translation of rare words with subword units","author":"Sennrich","year":"2015","journal-title":"arXiv:1508.07909"},{"key":"ref30","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref31","article-title":"Neural discrete representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"van den Oord"},{"key":"ref32","first-page":"8821","article-title":"Zero-shot text-to-image generation","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Ramesh"},{"key":"ref33","article-title":"Categorical reparameterization with Gumbel-Softmax","author":"Jang","year":"2016","journal-title":"arXiv:1611.01144"},{"key":"ref34","article-title":"BEiT: BERT pre-training of image transformers","author":"Bao","year":"2021","journal-title":"arXiv:2106.08254"},{"key":"ref35","article-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Tarvainen"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.264"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.572"},{"key":"ref38","article-title":"ShapeNet: An information-rich 3D model repository","author":"Chang","year":"2015","journal-title":"arXiv:1512.03012"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00167"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298801"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2980179.2980238"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00964"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00815"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00647"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00542"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_38"},{"key":"ref47","article-title":"PointGPT: Auto-regressively generative pre-training from point clouds","author":"Chen","year":"2023","journal-title":"arXiv:2305.11487"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01980"},{"article-title":"Autoencoders as cross-modal teachers: Can pretrained 2D image transformers help 3D representation learning?","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Dong","key":"ref49"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612106"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02085"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00910"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3317998"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28522"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.170"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3282568"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00765"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11154820\/10937188.pdf?arnumber=10937188","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T17:49:14Z","timestamp":1757526554000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10937188\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9]]},"references-count":57,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2025.3553525","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"type":"print","value":"1051-8215"},{"type":"electronic","value":"1558-2205"}],"subject":[],"published":{"date-parts":[[2025,9]]}}}