{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:57:36Z","timestamp":1780934256732,"version":"3.54.1"},"reference-count":44,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.patcog.2026.114059","type":"journal-article","created":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T23:21:33Z","timestamp":1779319293000},"page":"114059","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Zeromix: Multimodal mixing for zero-shot 3D point cloud classification"],"prefix":"10.1016","volume":"180","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4067-5462","authenticated-orcid":false,"given":"Qin","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8295-0496","authenticated-orcid":false,"given":"Guoqing","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.114059_bib0001","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0002","series-title":"ICLR","article-title":"Language-driven Semantic Segmentation","author":"Li","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0003","series-title":"CVPR","first-page":"18113","article-title":"GroupViT: semantic segmentation emerges from text supervision","author":"Xu","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0004","series-title":"International Conference on Learning Representations","article-title":"Open-vocabulary object detection via vision and language knowledge distillation","author":"Gu","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0005","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"10955","article-title":"Grounded language-image pre-training","author":"Li","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0006","series-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"976","article-title":"Audioclip: Extending clip to image, text and audio","author":"Guzhov","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0007","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.isprsjprs.2020.10.019","article-title":"Universal SAR and optical image registration via a novel SIFT framework based on nonlinear diffusion and a polar spatial-frequency descriptor","volume":"171","author":"Yu","year":"2021","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0008","first-page":"1","article-title":"A Novel iterative self-organizing pixel matrix entanglement classifier for remote sensing imagery","volume":"62","author":"Zhou","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0009","series-title":"CVPR","first-page":"8542","article-title":"PointCLIP: point cloud understanding by CLIP","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0010","series-title":"ICCV","first-page":"2639","article-title":"PointCLIP V2: prompting CLIP and GPT for powerful 3D open-world learning","author":"Zhu","year":"2023"},{"key":"10.1016\/j.patcog.2026.114059_bib0011","series-title":"ICCV","first-page":"22100","article-title":"CLIP2Point: transfer CLIP to point cloud classification with image-depth pre-training","author":"Huang","year":"2023"},{"key":"10.1016\/j.patcog.2026.114059_bib0012","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1016\/j.isprsjprs.2024.01.017","article-title":"LiDeNeRF: neural radiance field reconstruction with depth prior provided by LiDAR point cloud","volume":"208","author":"Wei","year":"2024","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0013","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.131188","article-title":"MVF-PointCLIP: training-free multi-view fusion pointCLIP for zero-shot 3D classification","volume":"653","author":"Dai","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.patcog.2026.114059_bib0014","series-title":"CVPR","first-page":"3728","article-title":"Cross-modal 3D representation with multi-view images and point clouds","author":"Zhou","year":"2025"},{"key":"10.1016\/j.patcog.2026.114059_bib0015","first-page":"4670","article-title":"CLIP-GS: unifying vision-language representation with 3D gaussian splatting","author":"Jiao","year":"2025","journal-title":"ICCV"},{"key":"10.1016\/j.patcog.2026.114059_bib0016","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"5766","article-title":"BlendCLIP: bridging synthetic and real domains for zero-shot 3D object classification with multimodal pretraining","author":"Khoche","year":"2026"},{"key":"10.1016\/j.patcog.2026.114059_bib0017","series-title":"CVPR","first-page":"77","article-title":"PointNet: deep learning on point sets for 3D classification and segmentation","author":"Charles","year":"2017"},{"key":"10.1016\/j.patcog.2026.114059_bib0018","article-title":"Pointnet++: deep hierarchical feature learning on point sets in a metric space","volume":"30","author":"Qi","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114059_bib0019","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114059_bib0020","series-title":"International Conference on Learning Representations","article-title":"An image is worth 16x16 words: transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0021","series-title":"CVPR","first-page":"19291","article-title":"Point-BERT: pre-training 3D point cloud transformers with masked point modeling","author":"Yu","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0022","series-title":"International Conference on Learning Representations","article-title":"Rethinking network design and local geometry in point cloud: a simple residual MLP framework","author":"Ma","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0023","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"16259","article-title":"Point transformer","author":"Zhao","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0024","series-title":"CVPR","first-page":"16928","article-title":"Fast point transformer","author":"Park","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0025","first-page":"33330","article-title":"Point transformer v2: grouped vector attention and partition-based pooling","volume":"35","author":"Wu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114059_bib0026","first-page":"1","article-title":"ECA-MobileNetV3(Large)+SegNet model for binary sugarcane classification of remotely sensed images","volume":"60","author":"Zhou","year":"2022","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0027","first-page":"1","article-title":"Threshold attention network for semantic segmentation of remote sensing images","volume":"61","author":"Long","year":"2023","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0028","series-title":"CVPR","first-page":"1179","article-title":"ULIP: learning a unified representation of language, images, and point clouds for 3D understanding","author":"Xue","year":"2023"},{"key":"10.1016\/j.patcog.2026.114059_bib0029","first-page":"1","article-title":"Study on pixel entanglement theory for imagery classification","volume":"60","author":"Zhou","year":"2022","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.patcog.2026.114059_bib0030","series-title":"Proceedings of the 40th International Conference on Machine Learning","first-page":"28223","article-title":"Contrast with reconstruct: contrastive 3D representation learning guided by generative pretraining","volume":"202","author":"Qi","year":"2023"},{"key":"10.1016\/j.patcog.2026.114059_bib0031","series-title":"CVPR","first-page":"15979","article-title":"Masked autoencoders are scalable vision learners","author":"He","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0032","series-title":"International Conference on Learning Representations","article-title":"Mixup: beyond empirical risk minimization","author":"Zhang","year":"2018"},{"key":"10.1016\/j.patcog.2026.114059_bib0033","series-title":"ICCV","first-page":"6022","article-title":"CutMix: regularization strategy to train strong classifiers with localizable features","author":"Yun","year":"2019"},{"key":"10.1016\/j.patcog.2026.114059_bib0034","series-title":"Computer Vision-ECCV 2020: 16th European Conference","first-page":"330","article-title":"Pointmixup: augmentation for point clouds","author":"Chen","year":"2020"},{"key":"10.1016\/j.patcog.2026.114059_bib0035","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/j.neucom.2022.07.049","article-title":"Pointcutmix: regularization strategy for point cloud classification","volume":"505","author":"Zhang","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.patcog.2026.114059_bib0036","series-title":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"3391","article-title":"Part-aware data augmentation for 3D object detection in point cloud","author":"Choi","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0037","series-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"15895","article-title":"Regularization strategy for point cloud via rigidly mixed sample","author":"Lee","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0038","series-title":"2021 International Conference on 3d Vision (3dv)","first-page":"116","article-title":"Mix3d: out-of-context data augmentation for 3d scenes","author":"Nekrasov","year":"2021"},{"key":"10.1016\/j.patcog.2026.114059_bib0039","series-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","article-title":"SageMix: saliency-guided mixup for point clouds","author":"Lee","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0040","series-title":"ICCV","first-page":"13910","article-title":"MixCycle: mixup assisted semi-supervised 3D single object tracking with cycle consistency","author":"Wu","year":"2023"},{"key":"10.1016\/j.patcog.2026.114059_bib0041","series-title":"Data Mining for Co-location Patterns: Principles and Applications","author":"Zhou","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0042","series-title":"Computer Vision - ECCV 2022","first-page":"529","article-title":"SLIP: self-supervision meets language-image pre-training","author":"Mu","year":"2022"},{"key":"10.1016\/j.patcog.2026.114059_bib0043","series-title":"CVPR","article-title":"Point transformer V3: simpler, faster, stronger","author":"Wu","year":"2024"},{"key":"10.1016\/j.patcog.2026.114059_bib0044","series-title":"ICCV","first-page":"6410","article-title":"KPConv: flexible and deformable convolution for point clouds","author":"Thomas","year":"2019"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326010241?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326010241?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T14:57:38Z","timestamp":1780930658000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326010241"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":44,"alternative-id":["S0031320326010241"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114059","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Zeromix: Multimodal mixing for zero-shot 3D point cloud classification","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114059","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114059"}}