{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T21:29:11Z","timestamp":1781126951135,"version":"3.54.1"},"publisher-location":"Cham","reference-count":98,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198113","type":"print"},{"value":"9783031198120","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19812-0_36","type":"book-chapter","created":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T14:03:42Z","timestamp":1667052222000},"page":"620-640","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":73,"title":["PointMixer: MLP-Mixer for\u00a0Point Cloud Understanding"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8978-6702","authenticated-orcid":false,"given":"Jaesung","family":"Choe","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9743-8495","authenticated-orcid":false,"given":"Chunghyun","family":"Park","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5031-7653","authenticated-orcid":false,"given":"Francois","family":"Rameau","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5541-409X","authenticated-orcid":false,"given":"Jaesik","family":"Park","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9626-5983","authenticated-orcid":false,"given":"In So","family":"Kweon","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,10,30]]},"reference":[{"issue":"2","key":"36_CR1","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s11263-016-0902-9","volume":"120","author":"H Aan\u00e6s","year":"2016","unstructured":"Aan\u00e6s, H., Jensen, R.R., Vogiatzis, G., Tola, E., Dahl, A.B.: Large-scale data for multiple-view stereopsis. Int. J. Comput. Vis. (IJCV) 120(2), 153\u2013168 (2016)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"36_CR2","doi-asserted-by":"crossref","unstructured":"Armeni, I., et al.: 3D semantic parsing of large-scale indoor spaces. In: Proceedings of the IEEE International Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.170"},{"key":"36_CR3","unstructured":"Bello, I.: Lambdanetworks: modeling long-range interactions without attention. In: International Conference on Learning Representations (2020)"},{"key":"36_CR4","doi-asserted-by":"crossref","unstructured":"Chen, C.F.R., Fan, Q., Panda, R.: Crossvit: cross-attention multi-scale vision transformer for image classification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 357\u2013366 (2021)","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"36_CR5","unstructured":"Chen, S., Xie, E., Ge, C., Liang, D., Luo, P.: CycleMLP: a MLP-like architecture for dense prediction. In: International Conference on Learning Representations (ICLR) (2022)"},{"issue":"3","key":"36_CR6","doi-asserted-by":"publisher","first-page":"4672","DOI":"10.1109\/LRA.2021.3068712","volume":"6","author":"J Choe","year":"2021","unstructured":"Choe, J., Joo, K., Imtiaz, T., Kweon, I.S.: Volumetric propagation network: stereo-lidar fusion for long-range depth estimation. IEEE Robot. Autom. Lett. 6(3), 4672\u20134679 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"36_CR7","unstructured":"Choe, J., Joung, B., Rameau, F., Park, J., Kweon, I.S.: Deep point cloud reconstruction. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"36_CR8","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., Savarese, S.: 4D spatio-temporal convnets: minkowski convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3075\u20133084 (2019)","DOI":"10.1109\/CVPR.2019.00319"},{"key":"36_CR9","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: richly-annotated 3D reconstructions of indoor scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5828\u20135839 (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"36_CR10","doi-asserted-by":"crossref","unstructured":"d\u2019Ascoli, S., Touvron, H., Leavitt, M.L., Morcos, A.S., Biroli, G., Sagun, L.: Convit: improving vision transformers with soft convolutional inductive biases. In: International Conference on Machine Learning, pp. 2286\u20132296. PMLR (2021)","DOI":"10.1088\/1742-5468\/ac9830"},{"key":"36_CR11","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186 (2019)"},{"key":"36_CR12","unstructured":"Ding, X., Xia, C., Zhang, X., Chu, X., Han, J., Ding, G.: Repmlp: re-parameterizing convolutions into fully-connected layers for image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)"},{"key":"36_CR13","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"36_CR14","doi-asserted-by":"crossref","unstructured":"Graham, B., Engelcke, M., van der Maaten, L.: 3D semantic segmentation with submanifold sparse convolutional networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00961"},{"key":"36_CR15","doi-asserted-by":"crossref","unstructured":"Graham, B., van der Maaten, L.: Submanifold sparse convolutional networks. arXiv preprint arXiv:1706.01307 (2017)","DOI":"10.1109\/CVPR.2018.00961"},{"key":"36_CR16","doi-asserted-by":"crossref","unstructured":"Guo, J., et al.: Hire-MLP: vision MLP via hierarchical rearrangement. arXiv preprint arXiv:2108.13341 (2021)","DOI":"10.1109\/CVPR52688.2022.00090"},{"issue":"2","key":"36_CR17","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s41095-021-0229-5","volume":"7","author":"MH Guo","year":"2021","unstructured":"Guo, M.H., Cai, J.X., Liu, Z.N., Mu, T.J., Martin, R.R., Hu, S.M.: PCT: point cloud transformer. Comput. Vis. Media 7(2), 187\u2013199 (2021)","journal-title":"Comput. Vis. Media"},{"issue":"12","key":"36_CR18","doi-asserted-by":"publisher","first-page":"4338","DOI":"10.1109\/TPAMI.2020.3005434","volume":"43","author":"Y Guo","year":"2020","unstructured":"Guo, Y., Wang, H., Hu, Q., Liu, H., Liu, L., Bennamoun, M.: Deep learning for 3D point clouds: a survey. IEEE Trans. Pattern Anal. Mach. Intell. (PAMI) 43(12), 4338\u20134364 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (PAMI)"},{"key":"36_CR19","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer. arXiv preprint arXiv:2103.00112 (2021)"},{"key":"36_CR20","doi-asserted-by":"crossref","unstructured":"Handa, A., Whelan, T., McDonald, J., Davison, A.J.: A benchmark for RGB-D visual odometry, 3D reconstruction and slam. In: IEEE International Conference on Robotics and Automation (ICRA) (2014)","DOI":"10.1109\/ICRA.2014.6907054"},{"key":"36_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"36_CR22","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"key":"36_CR23","unstructured":"Hou, Q., Jiang, Z., Yuan, L., Cheng, M.M., Yan, S., Feng, J.: Vision permutator: a permutable MLP-like architecture for visual recognition. arXiv preprint arXiv:2106.12368 (2021)"},{"key":"36_CR24","doi-asserted-by":"crossref","unstructured":"Hu, H., Zhang, Z., Xie, Z., Lin, S.: Local relation networks for image recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3464\u20133473 (2019)","DOI":"10.1109\/ICCV.2019.00356"},{"key":"36_CR25","doi-asserted-by":"crossref","unstructured":"Hu, Q., et al.: RandLA-Net: efficient semantic segmentation of large-scale point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11108\u201311117 (2020)","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"36_CR26","doi-asserted-by":"crossref","unstructured":"Huang, Q., Wang, W., Neumann, U.: Recurrent slice networks for 3D segmentation of point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00278"},{"key":"36_CR27","doi-asserted-by":"crossref","unstructured":"Jensen, R., Dahl, A., Vogiatzis, G., Tola, E., Aan\u00e6s, H.: Large scale multi-view stereopsis evaluation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2014)","DOI":"10.1109\/CVPR.2014.59"},{"key":"36_CR28","doi-asserted-by":"crossref","unstructured":"Jiang, L., Zhao, H., Liu, S., Shen, X., Fu, C.W., Jia, J.: Hierarchical point-edge interaction network for point cloud semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10433\u201310441 (2019)","DOI":"10.1109\/ICCV.2019.01053"},{"issue":"3","key":"36_CR29","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"36_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2021","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: a survey. ACM Comput. Surv. (CSUR) 54, 1\u201341 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"36_CR31","doi-asserted-by":"crossref","unstructured":"Klokov, R., Lempitsky, V.: Escape from cells: deep KD-networks for the recognition of 3D point cloud models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.99"},{"issue":"4","key":"36_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073599","volume":"36","author":"A Knapitsch","year":"2017","unstructured":"Knapitsch, A., Park, J., Zhou, Q.Y., Koltun, V.: Tanks and temples: benchmarking large-scale scene reconstruction. ACM Trans. Graph. (ToG) 36(4), 1\u201313 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"36_CR33","doi-asserted-by":"crossref","unstructured":"Komarichev, A., Zhong, Z., Hua, J.: A-CNN: annularly convolutional neural networks on point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7421\u20137430 (2019)","DOI":"10.1109\/CVPR.2019.00760"},{"key":"36_CR34","unstructured":"Lee, J., Choy, C., Park, J.: Putting 3D spatially sparse networks on a diet. arXiv preprint arXiv:2112.01316 (2021)"},{"key":"36_CR35","unstructured":"Li, J., Hassani, A., Walton, S., Shi, H.: ConvMLP: hierarchical convolutional MLPs for vision. arXiv preprint arXiv:2109.04454 (2021)"},{"key":"36_CR36","doi-asserted-by":"crossref","unstructured":"Li, J., Chen, B.M., Lee, G.H.: So-net: self-organizing network for point cloud analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00979"},{"key":"36_CR37","doi-asserted-by":"crossref","unstructured":"Li, R., Li, X., Heng, P.A., Fu, C.W.: Point cloud upsampling via disentangled refinement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00041"},{"key":"36_CR38","unstructured":"Li, Y., Bu, R., Sun, M., Wu, W., Di, X., Chen, B.: PointCNN: convolution on X-transformed points. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"36_CR39","unstructured":"Lian, D., Yu, Z., Sun, X., Gao, S.: AS-MLP: an axial shifted MLP architecture for vision. In: International Conference on Learning Representations (2022)"},{"key":"36_CR40","doi-asserted-by":"crossref","unstructured":"Lin, Y., et al.: FPConv: learning local flattening for point convolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4293\u20134302 (2020)","DOI":"10.1109\/CVPR42600.2020.00435"},{"key":"36_CR41","unstructured":"Liu, H., Dai, Z., So, D., Le, Q.V.: Pay attention to MLPs. In: Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"36_CR42","doi-asserted-by":"crossref","unstructured":"Liu, Y., Fan, B., Meng, G., Lu, J., Xiang, S., Pan, C.: Densepoint: learning densely contextual representation for efficient point cloud processing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5239\u20135248 (2019)","DOI":"10.1109\/ICCV.2019.00534"},{"key":"36_CR43","doi-asserted-by":"crossref","unstructured":"Liu, Y., Fan, B., Xiang, S., Pan, C.: Relation-shape convolutional neural network for point cloud analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8895\u20138904 (2019)","DOI":"10.1109\/CVPR.2019.00910"},{"key":"36_CR44","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"36_CR45","unstructured":"Liu, Z., Tang, H., Lin, Y., Han, S.: Point-voxel CNN for efficient 3D deep learning. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 32 (2019)"},{"key":"36_CR46","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"36_CR47","unstructured":"Ma, X., Qin, C., You, H., Ran, H., Fu, Y.: Rethinking network design and local geometry in point cloud: a simple residual MLP framework. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"36_CR48","doi-asserted-by":"crossref","unstructured":"Mao, J., Wang, X., Li, H.: Interpolated convolutional networks for 3D point cloud understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1578\u20131587 (2019)","DOI":"10.1109\/ICCV.2019.00166"},{"key":"36_CR49","doi-asserted-by":"crossref","unstructured":"Mao, J., et al.: Voxel transformer for 3D object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3164\u20133173 (2021)","DOI":"10.1109\/ICCV48922.2021.00315"},{"key":"36_CR50","doi-asserted-by":"crossref","unstructured":"Maturana, D., Scherer, S.: Voxnet: a 3D convolutional neural network for real-time object recognition. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 922\u2013928. IEEE (2015)","DOI":"10.1109\/IROS.2015.7353481"},{"key":"36_CR51","doi-asserted-by":"crossref","unstructured":"Mazur, K., Lempitsky, V.: Cloud transformers: a universal approach to point cloud processing tasks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10715\u201310724, October 2021","DOI":"10.1109\/ICCV48922.2021.01054"},{"key":"36_CR52","unstructured":"Muja, M., Lowe, D.G.: Fast approximate nearest neighbors with automatic algorithm configuration. VISAPP (1) 2(331\u2013340), 2 (2009)"},{"key":"36_CR53","doi-asserted-by":"crossref","unstructured":"Noh, H., Hong, S., Han, B.: Learning deconvolution network for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1520\u20131528 (2015)","DOI":"10.1109\/ICCV.2015.178"},{"key":"36_CR54","doi-asserted-by":"crossref","unstructured":"Park, C., Jeong, Y., Cho, M., Park, J.: Fast point transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16949\u201316958 (2022)","DOI":"10.1109\/CVPR52688.2022.01644"},{"key":"36_CR55","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 652\u2013660 (2017)"},{"key":"36_CR56","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: Pointnet++: deep hierarchical feature learning on point sets in a metric space. arXiv preprint arXiv:1706.02413 (2017)"},{"key":"36_CR57","doi-asserted-by":"crossref","unstructured":"Qi, X., Liao, R., Jia, J., Fidler, S., Urtasun, R.: 3D graph neural networks for RGBD semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5199\u20135208 (2017)","DOI":"10.1109\/ICCV.2017.556"},{"key":"36_CR58","unstructured":"Qian, G., Hammoud, H., Li, G., Thabet, A., Ghanem, B.: Assanet: an anisotropic separable set abstraction for efficient point cloud representation learning. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 34 (2021)"},{"key":"36_CR59","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018)"},{"key":"36_CR60","unstructured":"Ramachandran, P., Parmar, N., Vaswani, A., Bello, I., Levskaya, A., Shlens, J.: Stand-alone self-attention in vision models. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems, pp. 68\u201380 (2019)"},{"key":"36_CR61","doi-asserted-by":"crossref","unstructured":"Simonovsky, M., Komodakis, N.: Dynamic edge-conditioned filters in convolutional neural networks on graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3693\u20133702 (2017)","DOI":"10.1109\/CVPR.2017.11"},{"key":"36_CR62","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"36_CR63","doi-asserted-by":"crossref","unstructured":"Su, H., et al.: Splatnet: sparse lattice networks for point cloud processing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2530\u20132539 (2018)","DOI":"10.1109\/CVPR.2018.00268"},{"key":"36_CR64","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"685","DOI":"10.1007\/978-3-030-58604-1_41","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Tang","year":"2020","unstructured":"Tang, H., et al.: Searching efficient 3D architectures with sparse point-voxel convolution. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12373, pp. 685\u2013702. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58604-1_41"},{"key":"36_CR65","unstructured":"Tay, Y., Bahri, D., Metzler, D., Juan, D., Zhao, Z., Zheng, C.: Synthesizer: rethinking self-attention in transformer models. In: ICML (2021)"},{"key":"36_CR66","unstructured":"Tay, Y., Dehghani, M., Bahri, D., Metzler, D.: Efficient transformers: a survey. arXiv preprint arXiv:2009.06732 (2020)"},{"key":"36_CR67","doi-asserted-by":"crossref","unstructured":"Thomas, H., Qi, C.R., Deschaud, J.E., Marcotegui, B., Goulette, F., Guibas, L.J.: Kpconv: flexible and deformable convolution for point clouds. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6411\u20136420 (2019)","DOI":"10.1109\/ICCV.2019.00651"},{"key":"36_CR68","unstructured":"Tolstikhin, I.O., et al.: MLP-mixer: an all-MLP architecture for vision. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 34 (2021)"},{"key":"36_CR69","doi-asserted-by":"crossref","unstructured":"Touvron, H., et al.: Resmlp: feedforward networks for image classification with data-efficient training. arXiv preprint arXiv:2105.03404 (2021)","DOI":"10.1109\/TPAMI.2022.3206148"},{"key":"36_CR70","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"36_CR71","unstructured":"Trockman, A., Kolter, J.Z.: Patches are all you need? arXiv preprint arXiv:2201.09792 (2022)"},{"key":"36_CR72","doi-asserted-by":"crossref","unstructured":"Vaswani, A., Ramachandran, P., Srinivas, A., Parmar, N., Hechtman, B., Shlens, J.: Scaling local self-attention for parameter efficient visual backbones. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12894\u201312904 (2021)","DOI":"10.1109\/CVPR46437.2021.01270"},{"key":"36_CR73","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (NeurIPS) (2017)"},{"key":"36_CR74","doi-asserted-by":"crossref","unstructured":"Wang, C., Samari, B., Siddiqi, K.: Local spectral graph convolution for point set feature learning. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 52\u201366 (2018)","DOI":"10.1007\/978-3-030-01225-0_4"},{"key":"36_CR75","doi-asserted-by":"crossref","unstructured":"Wang, L., Huang, Y., Hou, Y., Zhang, S., Shan, J.: Graph attention convolution for point cloud semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10296\u201310305 (2019)","DOI":"10.1109\/CVPR.2019.01054"},{"key":"36_CR76","doi-asserted-by":"crossref","unstructured":"Wang, S., Suo, S., Ma, W.C., Pokrovsky, A., Urtasun, R.: Deep parametric continuous convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2589\u20132597 (2018)","DOI":"10.1109\/CVPR.2018.00274"},{"key":"36_CR77","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"5","key":"36_CR78","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang, Y., Sun, Y., Liu, Z., Sarma, S.E., Bronstein, M.M., Solomon, J.M.: Dynamic graph CNN for learning on point clouds. ACM Trans. Graph. (ToG) 38(5), 1\u201312 (2019)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"36_CR79","doi-asserted-by":"crossref","unstructured":"Wu, W., Qi, Z., Fuxin, L.: Pointconv: deep convolutional networks on 3D point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9621\u20139630 (2019)","DOI":"10.1109\/CVPR.2019.00985"},{"key":"36_CR80","unstructured":"Wu, Z., et al.: 3D shapenets: a deep representation for volumetric shapes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1912\u20131920 (2015)"},{"key":"36_CR81","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: simple and efficient design for semantic segmentation with transformers. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 34 (2021)"},{"key":"36_CR82","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhou, Z., Qiao, Y.: Geometry sharing network for 3D point cloud classification and segmentation. In: Association for the Advancement of Artificial Intelligence (AAAI) (2020)","DOI":"10.1609\/aaai.v34i07.6938"},{"key":"36_CR83","doi-asserted-by":"crossref","unstructured":"Xu, M., Ding, R., Zhao, H., Qi, X.: Paconv: position adaptive convolution with dynamic kernel assembling on point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3173\u20133182 (2021)","DOI":"10.1109\/CVPR46437.2021.00319"},{"key":"36_CR84","doi-asserted-by":"crossref","unstructured":"Xu, Q., Sun, X., Wu, C.Y., Wang, P., Neumann, U.: Grid-GCN for fast and scalable point cloud learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5661\u20135670 (2020)","DOI":"10.1109\/CVPR42600.2020.00570"},{"key":"36_CR85","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1007\/978-3-030-58607-2_35","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Yang","year":"2020","unstructured":"Yang, Z., Sun, Y., Liu, S., Qi, X., Jia, J.: CN: channel normalization for point cloud recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 600\u2013616. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_35"},{"key":"36_CR86","doi-asserted-by":"crossref","unstructured":"Yao, Y., Luo, Z., Li, S., Fang, T., Quan, L.: Mvsnet: depth inference for unstructured multi-view stereo. In: European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01237-3_47"},{"issue":"6","key":"36_CR87","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2980179.2980238","volume":"35","author":"L Yi","year":"2016","unstructured":"Yi, L., et al.: A scalable active framework for region annotation in 3D shape collections. ACM Trans. Graph. (ToG) 35(6), 1\u201312 (2016)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"36_CR88","doi-asserted-by":"crossref","unstructured":"Yu, T., Li, X., Cai, Y., Sun, M., Li, P.: $$\\text{S}^{2}$$-MLP: spatial-shift MLP architecture for vision. arXiv preprint arXiv:2106.07477 (2021)","DOI":"10.1109\/WACV51458.2022.00367"},{"key":"36_CR89","doi-asserted-by":"crossref","unstructured":"Yu, T., Li, X., Cai, Y., Sun, M., Li, P.: $$\\text{ S}^{2}$$-MLPV2: improved spatial-shift MLP architecture for vision. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV) (2022)","DOI":"10.1109\/WACV51458.2022.00367"},{"key":"36_CR90","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: Metaformer is actually what you need for vision. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"36_CR91","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Tokens-to-token VIT: training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"36_CR92","unstructured":"Zhang, D.J., et al.: MorphMLP: a self-attention free, MLP-like backbone for image and video. arXiv preprint arXiv:2111.12527 (2021)"},{"key":"36_CR93","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"644","DOI":"10.1007\/978-3-030-58586-0_38","volume-title":"Computer Vision \u2013 ECCV 2020","author":"F Zhang","year":"2020","unstructured":"Zhang, F., Fang, J., Wah, B., Torr, P.: Deep FusionNet for point cloud semantic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12369, pp. 644\u2013663. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58586-0_38"},{"key":"36_CR94","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jia, J., Koltun, V.: Exploring self-attention for image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10076\u201310085 (2020)","DOI":"10.1109\/CVPR42600.2020.01009"},{"key":"36_CR95","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Fu, C.W., Jia, J.: Pointweb: enhancing local neighborhood features for point cloud processing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5565\u20135573 (2019)","DOI":"10.1109\/CVPR.2019.00571"},{"key":"36_CR96","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P.H., Koltun, V.: Point transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16259\u201316268 (2021)","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"36_CR97","unstructured":"Zhou, Q.Y., Park, J., Koltun, V.: Open3D: a modern library for 3D data processing. arXiv preprint arXiv:1801.09847 (2018)"},{"key":"36_CR98","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: Voxelnet: end-to-end learning for point cloud based 3D object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4490\u20134499 (2018)","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19812-0_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,27]],"date-time":"2023-04-27T13:10:10Z","timestamp":1682601010000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19812-0_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198113","9783031198120"],"references-count":98,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19812-0_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"30 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"From the workshops, 367 reviewed full papers have been selected for publication","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}