{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T10:11:34Z","timestamp":1756462294829,"version":"3.37.3"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,12,21]],"date-time":"2023-12-21T00:00:00Z","timestamp":1703116800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,21]],"date-time":"2023-12-21T00:00:00Z","timestamp":1703116800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62162001","61762003"],"award-info":[{"award-number":["62162001","61762003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004772","name":"Natural Science Foundation of Ningxia Province","doi-asserted-by":"publisher","award":["2022AAC02041"],"award-info":[{"award-number":["2022AAC02041"]}],"id":[{"id":"10.13039\/501100004772","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00371-023-03191-4","type":"journal-article","created":{"date-parts":[[2023,12,21]],"date-time":"2023-12-21T18:02:03Z","timestamp":1703181723000},"page":"6655-6670","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["V$$^2$$MLP: an accurate and simple multi-view MLP network for fine-grained 3D shape recognition"],"prefix":"10.1007","volume":"40","author":[{"given":"Liang","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4247-6210","authenticated-orcid":false,"given":"Jing","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaojin","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjing","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,12,21]]},"reference":[{"key":"3191_CR1","doi-asserted-by":"crossref","unstructured":"Xiong, S., Tziafas, G., Kasaei, H.: Enhancing fine-grained 3D object recognition using hybrid multi-modal vision transformer-CNN models. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2023) (2023)","DOI":"10.1109\/IROS55552.2023.10342235"},{"key":"3191_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-023-02816-y","author":"R Wu","year":"2023","unstructured":"Wu, R., Bai, J., Li, W., Jiang, J.: DCNet: exploring fine-grained vision classification for 3D point clouds. Vis. Comput. (2023). https:\/\/doi.org\/10.1007\/s00371-023-02816-y","journal-title":"Vis. Comput."},{"key":"3191_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109509","volume":"139","author":"H Shao","year":"2023","unstructured":"Shao, H., Bai, J., Wu, R., Jiang, J., Liang, H.: FGPNet: a weakly supervised fine-grained 3D point clouds classification network. Pattern Recogn. 139, 109509 (2023). https:\/\/doi.org\/10.1016\/j.patcog.2023.109509","journal-title":"Pattern Recogn."},{"key":"3191_CR4","doi-asserted-by":"publisher","first-page":"1744","DOI":"10.1109\/TIP.2020.3048623","volume":"30","author":"X Liu","year":"2021","unstructured":"Liu, X., Han, Z., Liu, Y.-S., Zwicker, M.: Fine-grained 3D shape classification with hierarchical part-view attention. IEEE Trans. Image Process. 30, 1744\u20131758 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"3191_CR5","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., Learned-Miller, E.: Multi-view convolutional neural networks for 3D shape recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 945\u2013953 (2015)","DOI":"10.1109\/ICCV.2015.114"},{"key":"3191_CR6","doi-asserted-by":"crossref","unstructured":"Kanezaki, A., Matsushita, Y., Nishida, Y.: RotationNet: joint object categorization and pose estimation using multiviews from unsupervised viewpoints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5010\u20135019 (2018)","DOI":"10.1109\/CVPR.2018.00526"},{"key":"3191_CR7","doi-asserted-by":"crossref","unstructured":"Wei, X., Yu, R., Sun, J.: View-GCN: view-based graph convolutional network for 3D shape analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1850\u20131859 (2020)","DOI":"10.1109\/CVPR42600.2020.00192"},{"key":"3191_CR8","doi-asserted-by":"crossref","unstructured":"Shilane, P., Min, P., Kazhdan, M., Funkhouser, T.: The Princeton shape benchmark. In: Proceedings Shape Modeling Applications, pp. 167\u2013178 (2004). IEEE","DOI":"10.1109\/SMI.2004.1314504"},{"key":"3191_CR9","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"3191_CR10","doi-asserted-by":"crossref","unstructured":"Zhang, N., Donahue, J., Girshick, R., Darrell, T.: Part-based R-CNNs for fine-grained category detection. In: European Conference on Computer Vision, pp. 834\u2013849 (2014). Springer","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"3191_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Elhoseiny, M., Huang, X., Zhang, S., Elgammal, A., Metaxas, D.: Spda-cnn: Unifying semantic part detection and abstraction for fine-grained recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1143\u20131152 (2016)","DOI":"10.1109\/CVPR.2016.129"},{"key":"3191_CR12","doi-asserted-by":"crossref","unstructured":"Xiao, T., Xu, Y., Yang, K., Zhang, J., Peng, Y., Zhang, Z.: The application of two-level attention models in deep convolutional neural network for fine-grained image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 842\u2013850 (2015)","DOI":"10.1109\/CVPR.2015.7298685"},{"issue":"6","key":"3191_CR13","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1109\/TMM.2017.2648498","volume":"19","author":"B Zhao","year":"2017","unstructured":"Zhao, B., Wu, X., Feng, J., Peng, Q., Yan, S.: Diversified visual attention networks for fine-grained object classification. IEEE Trans. Multimedia 19(6), 1245\u20131256 (2017)","journal-title":"IEEE Trans. Multimedia"},{"key":"3191_CR14","doi-asserted-by":"crossref","unstructured":"Liu, F., Zou, C., Deng, X., Zuo, R., Lai, Y.-K., Ma, C., Liu, Y.-J., Wang, H.: Scenesketcher: Fine-grained image retrieval with scene sketches. In: European Conference on Computer Vision, pp. 718\u2013734 (2020). Springer","DOI":"10.1007\/978-3-030-58529-7_42"},{"key":"3191_CR15","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., RoyChowdhury, A., Maji, S.: Bilinear CNN models for fine-grained visual recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1449\u20131457 (2015)","DOI":"10.1109\/ICCV.2015.170"},{"issue":"9","key":"3191_CR16","doi-asserted-by":"publisher","first-page":"1771","DOI":"10.1007\/s00371-019-01770-y","volume":"36","author":"Y Zhu","year":"2020","unstructured":"Zhu, Y., Liu, G.: Fine-grained action recognition using multi-view attentions. Vis. Comput. 36(9), 1771\u20131781 (2020)","journal-title":"Vis. Comput."},{"key":"3191_CR17","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s00371-020-02018-w","volume":"38","author":"C Lyu","year":"2022","unstructured":"Lyu, C., Hu, G., Wang, D.: Attention to fine-grained information: hierarchical multi-scale network for retinal vessel segmentation. Vis. Comput. 38, 345\u2013355 (2022)","journal-title":"Vis. Comput."},{"key":"3191_CR18","doi-asserted-by":"publisher","first-page":"811","DOI":"10.1007\/s00371-020-02052-8","volume":"38","author":"M Li","year":"2022","unstructured":"Li, M., Lei, L., Sun, H., Li, X., Kuang, G.: Fine-grained visual classification via multilayer bilinear pooling with object localization. Vis. Comput. 38, 811\u2013820 (2022)","journal-title":"Vis. Comput."},{"key":"3191_CR19","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"3191_CR20","doi-asserted-by":"crossref","unstructured":"Maturana, D., Scherer, S.: VoxNet: a 3D convolutional neural network for real-time object recognition. In: 2015 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 922\u2013928 (2015). IEEE","DOI":"10.1109\/IROS.2015.7353481"},{"key":"3191_CR21","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009). IEEE","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3191_CR22","unstructured":"Savva, M., Yu, F., Su, H., Aono, M., Chen, B., Cohen-Or, D., Deng, W., Su, H., Bai, S., Bai, X., et al.: SHREC\u201916 track: largescale 3D shape retrieval from ShapeNet Core55. In: Proceedings of the Eurographics Workshop on 3D Object Retrieval, vol. 10 (2016)"},{"key":"3191_CR23","doi-asserted-by":"crossref","unstructured":"Johns, E., Leutenegger, S., Davison, A.J.: Pairwise decomposition of image sequences for active multi-view recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3813\u20133822 (2016)","DOI":"10.1109\/CVPR.2016.414"},{"key":"3191_CR24","doi-asserted-by":"crossref","unstructured":"Feng, Y., Zhang, Z., Zhao, X., Ji, R., Gao, Y.: Group-view convolutional neural networks for 3D shape recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 264\u2013272 (2018)","DOI":"10.1109\/CVPR.2018.00035"},{"key":"3191_CR25","doi-asserted-by":"crossref","unstructured":"Yang, Z., Wang, L.: Learning relationships for multi-view 3D object recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7505\u20137514 (2019)","DOI":"10.1109\/ICCV.2019.00760"},{"key":"3191_CR26","doi-asserted-by":"crossref","unstructured":"Yu, T., Meng, J., Yuan, J.: Multi-view harmonized bilinear network for 3D object recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 186\u2013194 (2018)","DOI":"10.1109\/CVPR.2018.00027"},{"issue":"12","key":"3191_CR27","doi-asserted-by":"publisher","first-page":"3244","DOI":"10.1109\/TVCG.2018.2866793","volume":"25","author":"S Chen","year":"2018","unstructured":"Chen, S., Zheng, L., Zhang, Y., Sun, Z., Xu, K.: VERAM: view-enhanced recurrent attention model for 3D shape classification. IEEE Trans. Vis. Comput. Graphics 25(12), 3244\u20133257 (2018)","journal-title":"IEEE Trans. Vis. Comput. Graphics"},{"key":"3191_CR28","doi-asserted-by":"crossref","unstructured":"Dai, G., Xie, J., Fang, Y.: Siamese CNN-BiLSTM architecture for 3D shape representation learning. In: IJCAI, pp. 670\u2013676 (2018)","DOI":"10.24963\/ijcai.2018\/93"},{"issue":"5","key":"3191_CR29","doi-asserted-by":"publisher","first-page":"1169","DOI":"10.1109\/TMM.2018.2875512","volume":"21","author":"C Ma","year":"2018","unstructured":"Ma, C., Guo, Y., Yang, J., An, W.: Learning multi-view representation with LSTM for 3-D shape recognition and retrieval. IEEE Trans. Multimedia 21(5), 1169\u20131182 (2018)","journal-title":"IEEE Trans. Multimedia"},{"key":"3191_CR30","first-page":"24261","volume":"34","author":"IO Tolstikhin","year":"2021","unstructured":"Tolstikhin, I.O., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., Yung, J., Steiner, A., Keysers, D., Uszkoreit, J., et al.: MLP-Mixer: an all-MLP architecture for vision. Ad. Neural Inf. Process. Syst. 34, 24261\u201324272 (2021)","journal-title":"Ad. Neural Inf. Process. Syst."},{"key":"3191_CR31","first-page":"9204","volume":"34","author":"H Liu","year":"2021","unstructured":"Liu, H., Dai, Z., So, D., Le, Q.V.: Pay attention to MLPs. Adv. Neural Inf. Process. Syst. 34, 9204\u20139215 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"3191_CR32","doi-asserted-by":"crossref","unstructured":"Tang, Y., Han, K., Guo, J., Xu, C., Li, Y., Xu, C., Wang, Y.: An image patch is a wave: phase-aware vision MLP. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10935\u201310944 (2022)","DOI":"10.1109\/CVPR52688.2022.01066"},{"key":"3191_CR33","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"2","key":"3191_CR34","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s41095-021-0229-5","volume":"7","author":"M-H Guo","year":"2021","unstructured":"Guo, M.-H., Cai, J.-X., Liu, Z.-N., Mu, T.-J., Martin, R.R., Hu, S.-M.: PCT: point cloud transformer. Comput. Vis. Media 7(2), 187\u2013199 (2021)","journal-title":"Comput. Vis. Media"},{"key":"3191_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P.H., Koltun, V.: Point transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16259\u201316268 (2021)","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"3191_CR36","unstructured":"Ma, X., Qin, C., You, H., Ran, H., Fu, Y.: Rethinking network design and local geometry in point cloud: a simple residual MLP framework. In: International Conference on Learning Representations (2022)"},{"issue":"8","key":"3191_CR37","doi-asserted-by":"publisher","first-page":"3986","DOI":"10.1109\/TIP.2019.2904460","volume":"28","author":"Z Han","year":"2019","unstructured":"Han, Z., Lu, H., Liu, Z., Vong, C.-M., Liu, Y.-S., Zwicker, M., Han, J., Chen, C.P.: 3D2SeqViews: aggregating sequential views for 3D global feature learning by CNN with hierarchical attention aggregation. IEEE Trans. Image Process. 28(8), 3986\u20133999 (2019)","journal-title":"IEEE Trans. Image Process."},{"key":"3191_CR38","doi-asserted-by":"publisher","unstructured":"Han, Z., Wang, X., Vong, C.M., Liu, Y.-S., Zwicker, M., Chen, C.L.P.: 3DViewGraph: learning global features for 3D shapes from a graph of unordered views with attention. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, pp. 758\u2013765 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/107","DOI":"10.24963\/ijcai.2019\/107"},{"issue":"2","key":"3191_CR39","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1109\/TIP.2018.2868426","volume":"28","author":"Z Han","year":"2018","unstructured":"Han, Z., Shang, M., Liu, Z., Vong, C.-M., Liu, Y.-S., Zwicker, M., Han, J., Chen, C.P.: SeqViews2SeqLabels: learning 3D global features via aggregating sequential views by RNN with attention. IEEE Trans. Image Process. 28(2), 658\u2013672 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"3191_CR40","doi-asserted-by":"publisher","unstructured":"Han, Z., Liu, X., Liu, Y.-S., Zwicker, M.: Parts4Feature: learning 3D global features from generally semantic parts in multiple views. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, pp. 766\u2013773 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/108","DOI":"10.24963\/ijcai.2019\/108"},{"key":"3191_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2921\u20132929 (2016)","DOI":"10.1109\/CVPR.2016.319"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03191-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-03191-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03191-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T17:01:00Z","timestamp":1730912460000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-03191-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,21]]},"references-count":41,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["3191"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-03191-4","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"type":"print","value":"0178-2789"},{"type":"electronic","value":"1432-2315"}],"subject":[],"published":{"date-parts":[[2023,12,21]]},"assertion":[{"value":"29 October 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This manuscript is approved by all authors for publication. I would like to declare on behalf of my co-authors that the work described was original research that has not been published previously, and not under consideration for publication elsewhere, in whole or in part. All the authors listed have approved the manuscript that is enclosed.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}