{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T17:11:09Z","timestamp":1777569069052,"version":"3.51.4"},"reference-count":107,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:00:00Z","timestamp":1731283200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:00:00Z","timestamp":1731283200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003422","name":"Global Collaborative Research, King Abdullah University of Science and Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003422","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11263-024-02283-5","type":"journal-article","created":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T04:24:19Z","timestamp":1731299059000},"page":"2197-2226","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["MVTN: Learning Multi-view Transformations for 3D Understanding"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3989-7540","authenticated-orcid":false,"given":"Abdullah","family":"Hamdi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Faisal","family":"AlZahrani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Silvio","family":"Giancola","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernard","family":"Ghanem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,11]]},"reference":[{"key":"2283_CR1","doi-asserted-by":"crossref","unstructured":"Abdelreheem, A., Skorokhodov, I., Ovsjanikov, M., & Wonka, P. (2023). Satr: Zero-shot semantic segmentation of 3d shapes. In: Proceedings of the international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV51070.2023.01392"},{"issue":"6","key":"2283_CR2","doi-asserted-by":"crossref","first-page":"1117","DOI":"10.1109\/TPAMI.2009.25","volume":"31","author":"CB Akg\u00fcl","year":"2009","unstructured":"Akg\u00fcl, C. B., Sankur, B., Yemez, Y., & Schmitt, F. (2009). 3d model retrieval using probability density-based shape descriptors. IEEE Transactions on Pattern Analysis and Machine Intelligence, 31(6), 1117\u20131133.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2283_CR3","doi-asserted-by":"crossref","unstructured":"Armeni, I., Sener, O., Zamir, A.R., Jiang, H., Brilakis, I., Fischer, M., & Savarese, S. (2016). 3d semantic parsing of large-scale indoor spaces. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1534\u20131543.","DOI":"10.1109\/CVPR.2016.170"},{"key":"2283_CR4","doi-asserted-by":"crossref","unstructured":"Bai, S., Bai, X., Zhou, Z., Zhang, Z., & Jan\u00a0Latecki, L. (2016). Gift: A real-time and scalable 3d shape search engine. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5023\u20135032.","DOI":"10.1109\/CVPR.2016.543"},{"key":"2283_CR5","doi-asserted-by":"crossref","unstructured":"Behley, J., Garbade, M., Milioto, A., Quenzel, J., Behnke, S., Stachniss, C., & Gall, J. (2019). SemanticKITTI: A dataset for semantic scene understanding of LiDAR sequences. In: Proceeding of the IEEE\/CVF international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00939"},{"issue":"4","key":"2283_CR6","doi-asserted-by":"crossref","first-page":"3145","DOI":"10.1109\/LRA.2018.2850061","volume":"3","author":"Y Ben-Shabat","year":"2018","unstructured":"Ben-Shabat, Y., Lindenbaum, M., & Fischer, A. (2018). 3dmfv: Three-dimensional point cloud classification in real-time using convolutional neural networks. IEEE Robotics and Automation Letters, 3(4), 3145\u20133152.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2283_CR7","unstructured":"Blender online community: Blender - a 3D modelling and rendering package. Blender Foundation, Blender Institute, Amsterdam (2018). Blender Foundation. http:\/\/www.blender.org"},{"key":"2283_CR8","doi-asserted-by":"crossref","unstructured":"Bradski, G. & Grossberg, S. (1994). Recognition of 3-d objects from multiple 2-d views by a self-organizing neural architecture. In: From statistics to neural networks, pp. 349\u2013375. Springer.","DOI":"10.1007\/978-3-642-79119-2_17"},{"key":"2283_CR9","unstructured":"Brock, A., Lim, T., Ritchie, J. M., & Weston, N. (2016). Generative and discriminative voxel modeling with convolutional neural networks. arXiv preprint arXiv:1608.04236."},{"issue":"1","key":"2283_CR10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1899404.1899405","volume":"30","author":"AM Bronstein","year":"2011","unstructured":"Bronstein, A. M., Bronstein, M. M., Guibas, L. J., & Ovsjanikov, M. (2011). Shape google: Geometric words and expressions for invariant shape retrieval. ACM Transactions on Graphics (TOG), 30(1), 1\u201320.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2283_CR11","doi-asserted-by":"crossref","unstructured":"Carlini, N., & Wagner, D. (2017). Towards evaluating the robustness of neural networks. In: IEEE symposium on security and privacy (SP).","DOI":"10.1109\/SP.2017.49"},{"key":"2283_CR12","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P. & Joulin, A. (2021). Emerging properties in self-supervised vision transformers. In: Proceedings of the international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"2283_CR13","unstructured":"Chang, A. X., Funkhouser, T., Guibas, L., Hanrahan, P., Huang, Q., Li, Z., Savarese, S., Savva, M., Song, S., Su, H., Xiao, J., Yi, L. & Yu, F. (2015). ShapeNet: An information-rich 3D model repository. Technical Report arXiv:1512.03012 [cs.GR], Stanford University \u2014 Princeton University \u2014 Toyota Technological Institute at Chicago."},{"key":"2283_CR14","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S. & Koltun, V. (2010). Data-driven suggestions for creativity support in 3d modeling. In: ACM SIGGRAPH Asia 2010 Papers, pp. 1\u201310.","DOI":"10.1145\/1882262.1866205"},{"key":"2283_CR15","unstructured":"Chen, W., Ling, H., Gao, J., Smith, E., Lehtinen, J., Jacobson, A. & Fidler, S. (2019). Learning to predict 3d objects with an interpolation-based differentiable renderer. In: Advances in neural information processing systems, pp. 9609\u20139619."},{"key":"2283_CR16","doi-asserted-by":"crossref","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., & Adam, H. (2018). Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European conference on computer vision (ECCV), pp. 801\u2013818.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"2283_CR17","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1111\/1467-8659.00669","volume":"22","author":"D-Y Chen","year":"2003","unstructured":"Chen, D.-Y., Tian, X.-P., Shen, Y.-T., & Ouhyoung, M. (2003). On visual similarity based 3d model retrieval. Computer Graphics Forum, 22, 223\u2013232.","journal-title":"Computer Graphics Forum"},{"key":"2283_CR18","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1111\/1467-8659.00669","volume":"22","author":"D-Y Chen","year":"2003","unstructured":"Chen, D.-Y., Tian, X.-P., Shen, Y.-T., & Ouhyoung, M. (2003). On visual similarity based 3d model retrieval. Computer Graphics Forum, 22, 223\u2013232.","journal-title":"Computer Graphics Forum"},{"issue":"12","key":"2283_CR19","doi-asserted-by":"crossref","first-page":"3244","DOI":"10.1109\/TVCG.2018.2866793","volume":"25","author":"S Chen","year":"2018","unstructured":"Chen, S., Zheng, L., Zhang, Y., Sun, Z., & Xu, K. (2018). Veram: View-enhanced recurrent attention model for 3d shape classification. IEEE Transactions on Visualization and Computer Graphics, 25(12), 3244\u20133257.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2283_CR20","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., & Savarese, S. (2019). 4d spatio-temporal convnets: Minkowski convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3075\u20133084.","DOI":"10.1109\/CVPR.2019.00319"},{"key":"2283_CR21","unstructured":"Cohen, T. & Welling, M. (2016). Group equivariant convolutional networks. In: International conference on machine learning, pp. 2990\u20132999."},{"key":"2283_CR22","doi-asserted-by":"crossref","unstructured":"Dai, A., & Nie\u00dfner, M. (2018). 3dmv: Joint 3d-multi-view prediction for 3d semantic scene segmentation. In: Proceedings of the European conference on computer vision (ECCV), pp. 452\u2013468.","DOI":"10.1007\/978-3-030-01249-6_28"},{"key":"2283_CR23","unstructured":"Deserno, M. (2004). How to generate equidistributed points on the surface of a sphere. If Polymerforshung (Ed.), 99."},{"key":"2283_CR24","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., & Houlsby, N. (2021). An image is worth 16x16 words: Transformers for image recognition at scale. ICLR."},{"key":"2283_CR25","doi-asserted-by":"crossref","unstructured":"Esteves, C., Xu, Y., Allen-Blanchette, C. & Daniilidis, K. (2019). Equivariant multi-view networks. In: Proceedings of the IEEE international conference on computer vision, pp. 1568\u20131577.","DOI":"10.1109\/ICCV.2019.00165"},{"key":"2283_CR26","doi-asserted-by":"crossref","unstructured":"Feng, Y., Zhang, Z., Zhao, X., Ji, R. & Gao, Y. (2018). Gvcnn: Group-view convolutional neural networks for 3d shape recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 264\u2013272.","DOI":"10.1109\/CVPR.2018.00035"},{"key":"2283_CR27","doi-asserted-by":"crossref","first-page":"8279","DOI":"10.1609\/aaai.v33i01.33018279","volume":"33","author":"Y Feng","year":"2019","unstructured":"Feng, Y., Feng, Y., You, H., Zhao, X., & Gao, Y. (2019). Meshnet: Mesh neural network for 3d shape representation. Proceedings of the AAAI conference on artificial intelligence, 33, 8279\u20138286.","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"2283_CR28","doi-asserted-by":"crossref","unstructured":"Furuya, T., & Ohbuchi, R. (2016). Deep aggregation of local 3d geometric features for 3d model retrieval. In: BMVC, vol. 7, p. 8.","DOI":"10.1145\/3095140.3095148"},{"issue":"4","key":"2283_CR29","doi-asserted-by":"crossref","first-page":"2269","DOI":"10.1109\/TIP.2011.2170081","volume":"21","author":"Y Gao","year":"2011","unstructured":"Gao, Y., Tang, J., Hong, R., Yan, S., Dai, Q., Zhang, N., & Chua, T.-S. (2011). Camera constraint-free view-based 3-d object retrieval. IEEE Transactions on Image Processing, 21(4), 2269\u20132281.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2283_CR30","doi-asserted-by":"crossref","unstructured":"Garland, M., & Heckbert, P. S. (1997). Surface simplification using quadric error metrics. In: Proceedings of the 24th annual conference on computer graphics and interactive techniques, pp. 209\u2013216.","DOI":"10.1145\/258734.258849"},{"key":"2283_CR31","unstructured":"Goyal, A., Law, H., Liu, B., Newell, A., & Deng, J. (2021). Revisiting point cloud shape classification with a simple and effective baseline. In: ICML."},{"key":"2283_CR32","doi-asserted-by":"crossref","unstructured":"Graham, B., Engelcke, M., & Van Der Maaten, L. (2018). 3d semantic segmentation with submanifold sparse convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 9224\u20139232.","DOI":"10.1109\/CVPR.2018.00961"},{"key":"2283_CR33","doi-asserted-by":"crossref","unstructured":"Grenander, U. (1978). Pattern analysis: lectures in pattern theory, Vol. I. Springer","DOI":"10.1007\/978-1-4684-9354-2"},{"key":"2283_CR34","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Giancola, S., & Ghanem, B. (2021). Mvtn: Multi-view transformation network for 3d shape recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp. 1\u201311.","DOI":"10.1109\/ICCV48922.2021.00007"},{"key":"2283_CR35","unstructured":"Hamdi, A., Giancola, S., & Ghanem, B. (2023). Voint cloud: Multi-view point cloud representation for 3d understanding. In: International conference on learning representations. https:\/\/openreview.net\/forum?id=IpGgfpMucHj."},{"key":"2283_CR36","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Melas-Kyriazi, L., Mai, J., Qian, G., Liu, R., Vondrick, C., Ghanem, B., & Vedaldi, A. (2024). Ges: Generalized exponential splatting for efficient radiance field rendering. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.01873"},{"key":"2283_CR37","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Muller, M., & Ghanem, B. (2020a). SADA: Semantic adversarial diagnostic attacks for autonomous applications. In: AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6722"},{"key":"2283_CR38","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1007\/978-3-030-58610-2_15","volume-title":"Computer Vision - ECCV 2020","author":"A Hamdi","year":"2020","unstructured":"Hamdi, A., Rojas, S., Thabet, A., & Ghanem, B. (2020). Advpc: Transferable adversarial perturbations on 3d point clouds. Computer Vision - ECCV 2020 (pp. 241\u2013257). Cham: Springer."},{"issue":"4","key":"2283_CR39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3306346.3322959","volume":"38","author":"R Hanocka","year":"2019","unstructured":"Hanocka, R., Hertz, A., Fish, N., Giryes, R., Fleishman, S., & Cohen-Or, D. (2019). Meshcnn: A network with an edge. ACM Transactions on Graphics (TOG), 38(4), 1\u201312.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2283_CR40","doi-asserted-by":"publisher","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2021). Masked autoencoders are scalable vision learners. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2111.06377 . https:\/\/arxiv.org\/abs\/2111.06377","DOI":"10.48550\/ARXIV.2111.06377"},{"key":"2283_CR41","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S. & Sun, J. (2015). Deep residual learning for image recognition. CoRR arXiv:1512.03385.","DOI":"10.1109\/CVPR.2016.90"},{"key":"2283_CR42","doi-asserted-by":"crossref","unstructured":"He, X., Zhou, Y., Zhou, Z., Bai, S. & Bai, X. (2018). Triplet-center loss for multi-view 3d object retrieval. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1945\u20131954.","DOI":"10.1109\/CVPR.2018.00208"},{"key":"2283_CR43","unstructured":"Hegde, V., & Zadeh, R. (2016). Fusionnet: 3d object classification using multiple data representations. arXiv preprint arXiv:1607.05695."},{"key":"2283_CR44","unstructured":"Jaderberg, M., Simonyan, K., & Zisserman, A., et al. (2015). Spatial transformer networks. In: Advances in Neural Information Processing Systems, pp. 2017\u20132025."},{"key":"2283_CR45","doi-asserted-by":"crossref","unstructured":"Jaritz, M., Gu, J. & Su, H. (2019). Multi-view pointnet for 3d scene understanding. In: Proceedings of the IEEE international conference on computer vision workshops.","DOI":"10.1109\/ICCVW.2019.00494"},{"issue":"10","key":"2283_CR46","doi-asserted-by":"crossref","first-page":"5121","DOI":"10.1109\/TIP.2019.2912356","volume":"28","author":"K Jia","year":"2019","unstructured":"Jia, K., Lin, J., Tan, M., & Tao, D. (2019). Deep multi-view learning using neuron-wise correlation-maximizing regularizes. IEEE Transactions on Image Processing, 28(10), 5121\u20135134.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2283_CR47","doi-asserted-by":"crossref","first-page":"8513","DOI":"10.1609\/aaai.v33i01.33018513","volume":"33","author":"J Jiang","year":"2019","unstructured":"Jiang, J., Bao, D., Chen, Z., Zhao, X., & Gao, Y. (2019). Mlvcnn: Multi-loop-view convolutional neural network for 3d shape retrieval. Proceedings of the AAAI conference on artificial intelligence, 33, 8513\u20138520.","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"2283_CR48","unstructured":"Jimenez\u00a0Rezende, D., Eslami, S. M. A., Mohamed, S., Battaglia, P., Jaderberg, M., & Heess, N. (2016). Unsupervised learning of 3d structure from images. In: Lee, D. D., Sugiyama, M., Luxburg, U. V., Guyon, I., Garnett, R. (Eds.) Advances in Neural Information Processing Systems 29, pp. 4996\u20135004. Curran Associates, Inc., http:\/\/papers.nips.cc\/paper\/6600-unsupervised-learning-of-3d-structure-from-images.pdf."},{"key":"2283_CR49","doi-asserted-by":"crossref","unstructured":"Kalogerakis, E., Averkiou, M., Maji, S., & Chaudhuri, S. (2017). 3d shape segmentation with projective convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3779\u20133788.","DOI":"10.1109\/CVPR.2017.702"},{"key":"2283_CR50","doi-asserted-by":"crossref","unstructured":"Kanezaki, A., Matsushita, Y., & Nishida, Y. (2018). Rotationnet: Joint object categorization and pose estimation using multiviews from unsupervised viewpoints. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5010\u20135019.","DOI":"10.1109\/CVPR.2018.00526"},{"key":"2283_CR51","doi-asserted-by":"crossref","unstructured":"Kato, H., Ushiku, Y., & Harada, T. (2018). Neural 3d mesh renderer. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp. 3907\u20133916.","DOI":"10.1109\/CVPR.2018.00411"},{"key":"2283_CR52","first-page":"156","volume":"6","author":"M Kazhdan","year":"2003","unstructured":"Kazhdan, M., Funkhouser, T., & Rusinkiewicz, S. (2003). Rotation invariant spherical harmonic representation of 3 d shape descriptors. Symposium on Geometry Processing, 6, 156\u2013164.","journal-title":"Symposium on Geometry Processing"},{"issue":"4","key":"2283_CR53","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1145\/3592433","volume":"42","author":"B Kerbl","year":"2023","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., & Drettakis, G. (2023). 3d Gaussian splatting for real-time radiance field rendering. ACM Transactions on Graphics, 42(4), 139.","journal-title":"ACM Transactions on Graphics"},{"key":"2283_CR54","doi-asserted-by":"crossref","unstructured":"Klokov, R., & Lempitsky, V. (2017). Escape from cells: Deep kd-networks for the recognition of 3d point cloud models. In: Proceedings of the IEEE international conference on computer vision, pp. 863\u2013872.","DOI":"10.1109\/ICCV.2017.99"},{"key":"2283_CR55","unstructured":"Kulkarni, T. D., Whitney, W. F., Kohli, P., & Tenenbaum, J. (2015). Deep convolutional inverse graphics network. In: Advances in Neural Information Processing Systems (NIPS), pp. 2539\u20132547."},{"key":"2283_CR56","doi-asserted-by":"crossref","unstructured":"Kundu, A., Yin, X., Fathi, A., Ross, D., Brewington, B., Funkhouser, T., & Pantofaru, C. (2020). Virtual multi-view fusion for 3d semantic segmentation. In: European conference on computer vision (ECCV), pp. 518\u2013535. Springer.","DOI":"10.1007\/978-3-030-58586-0_31"},{"key":"2283_CR57","doi-asserted-by":"crossref","unstructured":"Landrieu, L. & Boussaha, M. (2019). Point cloud oversegmentation with graph-structured deep metric learning, pp. 7440\u20137449.","DOI":"10.1109\/CVPR.2019.00762"},{"key":"2283_CR58","doi-asserted-by":"crossref","unstructured":"Landrieu, L. & Simonovsky, M. (2018). Large-scale point cloud semantic segmentation with superpoint graphs. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp. 4558\u20134567.","DOI":"10.1109\/CVPR.2018.00479"},{"key":"2283_CR59","unstructured":"Lenail, A. (2020). NN-SVG. http:\/\/alexlenail.me\/NN-SVG\/index.html."},{"key":"2283_CR60","doi-asserted-by":"crossref","unstructured":"Li, T.-M., Aittala, M., Durand, F., & Lehtinen, J. (2018). Differentiable Monte Carlo ray tracing through edge sampling. In: SIGGRAPH Asia 2018 Technical Papers, p. 222. ACM","DOI":"10.1145\/3272127.3275109"},{"key":"2283_CR61","unstructured":"Li, Y., Bu, R., Sun, M., Wu, W., Di, X. & Chen, B. (2018). Pointcnn: Convolution on x-transformed points. In: Advances in neural information processing systems (NIPS), pp. 820\u2013830."},{"issue":"3","key":"2283_CR62","doi-asserted-by":"crossref","first-page":"821","DOI":"10.1007\/s11042-011-0873-3","volume":"62","author":"B Li","year":"2013","unstructured":"Li, B., & Johan, H. (2013). 3d model retrieval using hybrid features and class information. Multimedia Tools and Applications, 62(3), 821\u2013846.","journal-title":"Multimedia Tools and Applications"},{"key":"2283_CR63","doi-asserted-by":"crossref","unstructured":"Lin, C.-H., Ma, W.-C., Torralba, A., & Lucey, S. (2021). Barf: Bundle-adjusting neural radiance fields. In: IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV48922.2021.00569"},{"key":"2283_CR64","doi-asserted-by":"crossref","unstructured":"Liu, Y., Fan, B., Xiang, S., & Pan, C. (2019). Relation-shape convolutional neural network for point cloud analysis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 8895\u20138904.","DOI":"10.1109\/CVPR.2019.00910"},{"key":"2283_CR65","doi-asserted-by":"crossref","unstructured":"Liu, S., Li, T., Chen, W., & Li, H. (2019). Soft rasterizer: A differentiable renderer for image-based 3d reasoning. In: Proceedings of the IEEE international conference on computer vision, pp. 7708\u20137717.","DOI":"10.1109\/ICCV.2019.00780"},{"key":"2283_CR66","unstructured":"Liu, Z., Tang, H., Lin, Y. & Han, S. (2019). Point-voxel CNN for efficient 3d deep learning. In: Advances in neural information processing systems, pp. 965\u2013975."},{"key":"2283_CR67","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., & Vondrick, C. (2023). Zero-1-to-3: Zero-shot one image to 3d object. arXiv preprint arXiv:2303.11328.","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"2283_CR68","doi-asserted-by":"crossref","unstructured":"Loper, M. M., & Black, M. J. (2014). Opendr: An approximate differentiable renderer. In: European conference on computer vision (ECCV), pp. 154\u2013169. Springer.","DOI":"10.1007\/978-3-319-10584-0_11"},{"key":"2283_CR69","unstructured":"Loshchilov, I., & Hutter, F. (2017). Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101."},{"key":"2283_CR70","doi-asserted-by":"crossref","unstructured":"Maturana, D., & Scherer, S. (2015). Voxnet: A 3d convolutional neural network for real-time object recognition. In: 2015 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp. 922\u2013928. IEEE.","DOI":"10.1109\/IROS.2015.7353481"},{"key":"2283_CR71","doi-asserted-by":"crossref","unstructured":"Michel, O., Bar-On, R., Liu, R., Benaim, S., & Hanocka, R. (2022). Text2mesh: Text-driven neural stylization for meshes. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 13492\u201313502.","DOI":"10.1109\/CVPR52688.2022.01313"},{"key":"2283_CR72","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., & Ng, R. (2020). Nerf: Representing scenes as neural radiance fields for view synthesis. In: European conference computer vision, pp. 405\u2013421. Springer","DOI":"10.1007\/978-3-030-58452-8_24"},{"issue":"4","key":"2283_CR73","doi-asserted-by":"crossref","first-page":"807","DOI":"10.1145\/571647.571648","volume":"21","author":"R Osada","year":"2002","unstructured":"Osada, R., Funkhouser, T., Chazelle, B., & Dobkin, D. (2002). Shape distributions. ACM Transactions on Graphics (TOG), 21(4), 807\u2013832.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2283_CR74","unstructured":"Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., & Lerer, A. (2017). Automatic differentiation in pytorch. In: NIPS-W."},{"key":"2283_CR75","unstructured":"Poole, B., Jain, A., Barron, J. T., & Mildenhall, B. (2022). Dreamfusion: Text-to-3d using 2d diffusion. Represent: international conference learning."},{"key":"2283_CR76","unstructured":"Qi, C. R., Su, H., Mo, K., & Guibas, L. J. (2017a). Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp. 652\u2013660."},{"key":"2283_CR77","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Su, H., Nie\u00dfner, M., Dai, A., Yan, M. & Guibas, L. J. (2016). Volumetric and multi-view CNNS for object classification on 3d data. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5648\u20135656.","DOI":"10.1109\/CVPR.2016.609"},{"key":"2283_CR78","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Su, H., Nie\u00dfner, M., Dai, A., Yan, M., & Guibas, L. J. (2016). Volumetric and multi-view CNNS for object classification on 3d data. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5648\u20135656.","DOI":"10.1109\/CVPR.2016.609"},{"key":"2283_CR79","unstructured":"Qi, C. R., Yi, L., Su, H., & Guibas, L. J. (2017b). Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems (NIPS), pp. 5099\u20135108."},{"key":"2283_CR80","unstructured":"Qian, G., Mai, J., Hamdi, A., Ren, J., Siarohin, A., Li, B., Lee, H.-Y., Skorokhodov, I., Wonka, P., Tulyakov, S., & Ghanem, B. (2023). Magic123: One image to high-quality 3d object generation using both 2d and 3d diffusion priors. arXiv preprint arXiv:2306.17843."},{"key":"2283_CR81","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., & Clark, J., et al. (2021). Learning transferable visual models from natural language supervision. In: International conference on machine learning, pp. 8748\u20138763. PMLR"},{"key":"2283_CR82","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., & Sutskever, I. (2021). Zero-shot text-to-image generation. In: International conference on machine learning, pp. 8821\u20138831. PMLR"},{"key":"2283_CR83","unstructured":"Ravi, N., Reizenstein, J., Novotny, D., Gordon, T., Lo, W.-Y., Johnson, J. & Gkioxari, G. (2020). Accelerating 3d deep learning with pytorch3d. arXiv:2007.08501."},{"key":"2283_CR84","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In: IEEE conference on computer vision pattern recognition, pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2283_CR85","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M. S., Berg, A. C., & Li, F. (2014). Imagenet large scale visual recognition challenge. CoRR arXiv:1409.0575.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"2283_CR86","doi-asserted-by":"publisher","unstructured":"Sfikas, K., Theoharis, T. & Pratikakis, I. (2017). Exploiting the PANORAMA representation for convolutional neural network classification and retrieval. In: Pratikakis, I., Dupont, F., Ovsjanikov, M. (Eds.) Eurographics workshop on 3D object retrieval, pp. 1\u20137. The Eurographics Association, https:\/\/doi.org\/10.2312\/3dor.20171045.","DOI":"10.2312\/3dor.20171045"},{"key":"2283_CR87","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., & Learned-Miller, E. (2015). Multi-view convolutional neural networks for 3d shape recognition. In: Proceedings of the IEEE international conference on computer vision, pp. 945\u2013953.","DOI":"10.1109\/ICCV.2015.114"},{"issue":"May","key":"2283_CR88","first-page":"1027","volume":"8","author":"M Sugiyama","year":"2007","unstructured":"Sugiyama, M. (2007). Dimensionality reduction of multimodal labeled data by local fisher discriminant analysis. Journal of Machine Learning Research, 8(May), 1027\u20131061.","journal-title":"Journal of Machine Learning Research"},{"key":"2283_CR89","doi-asserted-by":"crossref","unstructured":"Thomas, H., Qi, C.R., Deschaud, J.-E., Marcotegui, B., Goulette, F., & Guibas, L. J. (2019). Kpconv: Flexible and deformable convolution for point clouds. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6411\u20136420.","DOI":"10.1109\/ICCV.2019.00651"},{"key":"2283_CR90","doi-asserted-by":"crossref","unstructured":"Truong, P., Rakotosaona, M.-J., Manhardt, F., & Tombari, F. (2023). Sparf: Neural radiance fields from sparse and noisy poses. In: IEEE\/CVF conference on computer vision and pattern recognition, CVPR.","DOI":"10.1109\/CVPR52729.2023.00408"},{"key":"2283_CR91","doi-asserted-by":"crossref","unstructured":"Uy, M. A., Pham, Q.-H., Hua, B.-S., Nguyen, D. T. & Yeung, S.-K. (2019). Revisiting point cloud classification: A new benchmark dataset and classification model on real-world data. In: International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00167"},{"key":"2283_CR92","doi-asserted-by":"crossref","unstructured":"Wang, Y., Sun, Y., Liu, Z., Sarma, S. E., Bronstein, M. M., & Solomon, J. M. (2019). Dynamic graph CNN for learning on point clouds. ACM Transactions on Graphics (TOG).","DOI":"10.1145\/3326362"},{"key":"2283_CR93","doi-asserted-by":"crossref","unstructured":"Wang, W., Yu, R., Huang, Q. & Neumann, U. (2018). SGPN: Similarity group proposal network for 3d point cloud instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp. 2569\u20132578.","DOI":"10.1109\/CVPR.2018.00272"},{"issue":"3","key":"2283_CR94","doi-asserted-by":"crossref","first-page":"2902","DOI":"10.1109\/LRA.2019.2922582","volume":"4","author":"BH Wang","year":"2019","unstructured":"Wang, B. H., Chao, W.-L., Wang, Y., Hariharan, B., Weinberger, K. Q., & Campbell, M. (2019). Ldls: 3-d object segmentation through label diffusion from 2-d images. IEEE Robotics and Automation Letters, 4(3), 2902\u20132909.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2283_CR95","doi-asserted-by":"crossref","unstructured":"Wei, X., Yu, R., & Sun, J. (2020). View-gcn: View-based graph convolutional network for 3d shape analysis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 1850\u20131859.","DOI":"10.1109\/CVPR42600.2020.00192"},{"key":"2283_CR96","doi-asserted-by":"crossref","unstructured":"Wiles, O., Gkioxari, G., Szeliski, R. & Johnson, J. (2020). Synsin: End-to-end view synthesis from a single image. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 7467\u20137477.","DOI":"10.1109\/CVPR42600.2020.00749"},{"key":"2283_CR97","unstructured":"Woo, M., Neider, J., Davis, T., & Shreiner, D. (1998). OpenGL programming guide: The official guide to learning OpenGL, Release 1. Addison-wesley."},{"key":"2283_CR98","unstructured":"Wu, Z., Song, S., Khosla, A., Yu, F., Zhang, L., Tang, X. & Xiao, J. (2015). 3d shapenets: A deep representation for volumetric shapes. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp. 1912\u20131920."},{"key":"2283_CR99","unstructured":"Wu, Z., Song, S., Khosla, A., Yu, F., Zhang, L., Tang, X. & Xiao, J. (2015). 3d shapenets: A deep representation for volumetric shapes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1912\u20131920."},{"key":"2283_CR100","doi-asserted-by":"crossref","unstructured":"Wu, J., Tenenbaum, J. B., & Kohli, P. (2017). Neural scene de-rendering. In: IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.744"},{"key":"2283_CR101","doi-asserted-by":"crossref","unstructured":"Xu, Y., Fan, T., Xu, M., Zeng, L., & Qiao, Y. (2018). Spidercnn: Deep learning on point sets with parameterized convolutional filters. In: Proceedings of the European conference on computer vision (ECCV), pp. 87\u2013102.","DOI":"10.1007\/978-3-030-01237-3_6"},{"key":"2283_CR102","doi-asserted-by":"crossref","unstructured":"Yang, Z. & Wang, L. (2019). Learning relationships for multi-view 3d object recognition. In: Proceedings of the IEEE international conference on computer vision, pp. 7505\u20137514.","DOI":"10.1109\/ICCV.2019.00760"},{"issue":"6","key":"2283_CR103","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2980179.2980238","volume":"35","author":"L Yi","year":"2016","unstructured":"Yi, L., Kim, V. G., Ceylan, D., Shen, I.-C., Yan, M., Su, H., Lu, C., Huang, Q., Sheffer, A., & Guibas, L. (2016). A scalable active framework for region annotation in 3d shape collections. ACM Transactions on Graphics (ToG), 35(6), 1\u201312.","journal-title":"ACM Transactions on Graphics (ToG)"},{"key":"2283_CR104","doi-asserted-by":"crossref","unstructured":"You, H., Feng, Y., Ji, R. & Gao, Y. (2018). Pvnet: A joint convolutional network of point cloud and multi-view for 3d shape recognition. In: Proceedings of the 26th ACM international conference on multimedia, pp. 1310\u20131318.","DOI":"10.1145\/3240508.3240702"},{"key":"2283_CR105","doi-asserted-by":"crossref","unstructured":"Yu, Y., & Smith, W. A. (2018). Inverserendernet: Learning single image inverse rendering. arXiv preprint arXiv:1811.12328.","DOI":"10.1109\/CVPR.2019.00327"},{"key":"2283_CR106","doi-asserted-by":"crossref","unstructured":"Yu, T., Meng, J. & Yuan, J. (2018). Multi-view harmonized bilinear network for 3d object recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 186\u2013194.","DOI":"10.1109\/CVPR.2018.00027"},{"key":"2283_CR107","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P., & Koltun, V. (2020). Point transformer. arXiv preprint arXiv:2012.09164.","DOI":"10.1109\/ICCV48922.2021.01595"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02283-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02283-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02283-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T22:17:31Z","timestamp":1743373051000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02283-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,11]]},"references-count":107,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["2283"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02283-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,11]]},"assertion":[{"value":"22 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}