{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T05:35:27Z","timestamp":1780378527953,"version":"3.54.1"},"reference-count":80,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tmm.2025.3581747","type":"journal-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T14:24:15Z","timestamp":1750861455000},"page":"6382-6396","source":"Crossref","is-referenced-by-count":4,"title":["PointMT: Efficient Point Cloud Analysis With Hybrid MLP-Transformer Architecture"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9480-5827","authenticated-orcid":false,"given":"Qiang","family":"Zheng","sequence":"first","affiliation":[{"name":"State Key Laboratory for Strength and Vibration of Mechanical Structures, School of Aerospace Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chao","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Strength and Vibration of Mechanical Structures, School of Aerospace Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3104-7904","authenticated-orcid":false,"given":"Jian","family":"Sun","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Strength and Vibration of Mechanical Structures, School of Aerospace Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.114"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-021-02840-2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353481"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00268"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.701"},{"key":"ref6","first-page":"1","article-title":"PointNet++: Deep hierarchical feature learning on point sets in a metric space","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Qi","year":"2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00985"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3326362"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3116304"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_5"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3198318"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref16","first-page":"1","article-title":"BEit: BERT pre-training of image transformers","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bao","year":"2022"},{"key":"ref17","first-page":"213","article-title":"End-to-end object detection with transformers","volume-title":"Proc. IEEE Eur. Conf. Comput. Vis.","author":"Nicolas","year":"2020"},{"key":"ref18","first-page":"1","article-title":"Deformable DETR: Deformable transformers for end-to-end object detection","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhu","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"ref20","first-page":"1","article-title":"Sparse DETR: Efficient end-to-end object detection with learnable sparsity","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Roh","year":"2022"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108663"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref24","article-title":"Transformers meet visual learning understanding: A comprehensive review","author":"Yang","year":"2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475272"},{"key":"ref26","article-title":"Understanding video transformers for segmentation: A survey of application and interpretability","author":"Karim","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01362"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.16"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00641"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02090"},{"key":"ref31","first-page":"828","article-title":"PointCNN: Convolution on x-transformed points","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Li","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_6"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00274"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00651"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00319"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00760"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6725"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00492"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956238"},{"key":"ref40","first-page":"23872","article-title":"CoFiNet: Reliable coarse-to-fine correspondences for robust pointcloud registration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yu","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00178"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01009"},{"key":"ref45","first-page":"33330","article-title":"Point transformer v2: Grouped vector attention and partition-based pooling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Wu","year":"2022"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00831"},{"key":"ref47","article-title":"Collect-and-distribute transformer for 3D point cloud analysis","author":"Qiu","year":"2023"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00110"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3347722"},{"key":"ref50","first-page":"32653","article-title":"Pointmamba: A simple state space model for point cloud analysis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liang","year":"2024"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33098"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_35"},{"key":"ref53","first-page":"27061","article-title":"Point-M2ae: Multi-scale masked autoencoders for hierarchical point cloud pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Zhang","year":"2022"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3317998"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3284591"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_34"},{"key":"ref57","first-page":"1","article-title":"Contrast with reconstruct: Contrastive 3D representation learning guided by generative pretraining","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Qi","year":"2023"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_35"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1049\/cit2.12239"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3382512"},{"key":"ref61","first-page":"1","article-title":"Pointcnn: Convolution on X-transformed points","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Li","year":"2018"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108769"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00910"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3358695"},{"key":"ref65","first-page":"23192","article-title":"Pointnext: Revisiting pointnet with improved training and scaling strategies","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Qian","year":"2022"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3374580"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00095"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3304892"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3486612"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3162301"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3312855"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530166"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01837"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00067"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201301"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00571"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00563"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3341894"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00352"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3363244"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/10844992\/11050981.pdf?arnumber=11050981","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T13:03:00Z","timestamp":1759237380000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11050981\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":80,"URL":"https:\/\/doi.org\/10.1109\/tmm.2025.3581747","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}