{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T02:51:21Z","timestamp":1777603881076,"version":"3.51.4"},"reference-count":178,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,10,12]],"date-time":"2024-10-12T00:00:00Z","timestamp":1728691200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,10,12]],"date-time":"2024-10-12T00:00:00Z","timestamp":1728691200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100007129","name":"Shandong Provincial Natural Science Foundation","doi-asserted-by":"crossref","award":["ZR2021QF062"],"award-info":[{"award-number":["ZR2021QF062"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-024-10937-6","type":"journal-article","created":{"date-parts":[[2024,10,12]],"date-time":"2024-10-12T05:02:05Z","timestamp":1728709325000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Advances in text-guided 3D editing: a survey"],"prefix":"10.1007","volume":"57","author":[{"given":"Lihua","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruyang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guoguang","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Binqiang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,12]]},"reference":[{"key":"10937_CR1","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S et al (2023) GPT-4 technical report. arXiv preprint. arXiv:2303.08774"},{"key":"10937_CR2","doi-asserted-by":"crossref","unstructured":"Achlioptas P, Huang I, Sung M, Tulyakov S, Guibas L (2023) Shapetalk: A language dataset and framework for 3D shape edits and deformations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12685\u201312694","DOI":"10.1109\/CVPR52729.2023.01220"},{"key":"10937_CR3","doi-asserted-by":"crossref","unstructured":"Aliev K-A, Sevastopolsky A, Kolos M, Ulyanov D, Lempitsky V (2020) Neural point-based graphics. In: Proceeding of the 16th European conference on computer vision. Springer, pp 696\u2013712","DOI":"10.1007\/978-3-030-58542-6_42"},{"issue":"4","key":"10937_CR4","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1145\/3197517.3201301","volume":"37","author":"M Atzmon","year":"2018","unstructured":"Atzmon M, Maron H, Lipman Y (2018) Point convolutional neural networks by extension operators. ACM Trans Graph 37(4):71","journal-title":"ACM Trans Graph"},{"issue":"4","key":"10937_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592450","volume":"42","author":"O Avrahami","year":"2023","unstructured":"Avrahami O, Fried O, Lischinski D (2023) Blended latent diffusion. ACM Trans Graph (TOG) 42(4):1\u201311","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR6","doi-asserted-by":"crossref","unstructured":"Barron JT, Mildenhall B, Verbin D, Srinivasan PP, Hedman P (2022) Mip-NeRF 360: unbounded anti-aliased neural radiance fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5470\u20135479","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"10937_CR7","unstructured":"Betker J, Goh G, Jing L, Brooks T, Wang J, Li L, Ouyang L, Zhuang J, Lee J, Guo Y (2023) Improving image generation with better captions. Computer Science. https:\/\/www.cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"10937_CR8","unstructured":"Bi\u0144kowski M, Sutherland DJ, Arbel M, Gretton A (2018) Demystifying mmd gans. arXiv preprint arXiv:1801.01401"},{"key":"10937_CR9","doi-asserted-by":"crossref","unstructured":"Brooks T, Holynski A, Efros AA (2023) InstructPix2Pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 18392\u201318402","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"10937_CR10","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1007\/s00371-018-1550-6","volume":"34","author":"G Bui","year":"2018","unstructured":"Bui G, Le T, Morago B, Duan Y (2018) Point-based rendering enhancement via deep learning. Vis Comput 34:829\u2013841","journal-title":"Vis Comput"},{"key":"10937_CR11","doi-asserted-by":"crossref","unstructured":"Cao T, Kreis K, Fidler S, Sharp N, Yin K (2023) TexFusion: synthesizing 3D textures with text-guided image diffusion models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4169\u20134181","DOI":"10.1109\/ICCV51070.2023.00385"},{"key":"10937_CR12","doi-asserted-by":"crossref","unstructured":"Chan ER, Lin CZ, Chan MA, Nagano K, Pan B, De\u00a0Mello S, Gallo O, Guibas LJ, Tremblay J, Khamis S (2022) Efficient geometry-aware 3d generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"10937_CR20","doi-asserted-by":"crossref","unstructured":"Chen A, Xu Z, Geiger A, Yu J, Su H (2022) TensoRF: tensorial radiance fields. In: European conference on computer vision. Springer, pp 333\u2013350","DOI":"10.1007\/978-3-031-19824-3_20"},{"key":"10937_CR13","doi-asserted-by":"crossref","unstructured":"Chen DZ, Siddiqui Y, Lee H-Y, Tulyakov S, Nie\u00dfner M (2023a) Text2Tex: text-driven texture synthesis via diffusion models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 18558\u201318568","DOI":"10.1109\/ICCV51070.2023.01701"},{"key":"10937_CR14","unstructured":"Chen Y, Chen A, Chen S, Yi R (2023b) Plasticine3D: non-rigid 3D editting with text guidance. arXiv preprint. arXiv:2312.10111"},{"key":"10937_CR15","doi-asserted-by":"crossref","unstructured":"Chen R, Chen Y, Jiao N, Jia K (2023c) Fantasia3D: disentangling geometry and appearance for high-quality text-to-3d content creation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 22246\u201322256","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"10937_CR18","unstructured":"Chen Y, Shao G, Shum KC, Hua B-S, Yeung S-K (2023d) Advances in 3D neural stylization: a survey. arXiv preprint. arXiv:2311.18328"},{"key":"10937_CR16","doi-asserted-by":"crossref","unstructured":"Chen Y, Chen Z, Zhang C, Wang F, Yang X, Wang Y, Cai Z, Yang L, Liu H, Lin G (2024a) GaussianEditor: swift and controllable 3D editing with Gaussian splatting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 21476\u201321485","DOI":"10.1109\/CVPR52733.2024.02029"},{"key":"10937_CR19","doi-asserted-by":"crossref","unstructured":"Chen M, Xie J, Laina I, Vedaldi A (2024b) Shap-Editor: instruction-guided latent 3D editing in seconds. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 26456\u201326466","DOI":"10.1109\/CVPR52733.2024.02498"},{"key":"10937_CR17","unstructured":"Cheng X, Yang T, Wang J, Li Y, Zhang L, Zhang J, Yuan L (2023) Progressive3d: Progressively local editing for text-to-3d content creation with complex semantic prompts. arXiv preprint arXiv:2310.11784"},{"key":"10937_CR21","doi-asserted-by":"crossref","unstructured":"Chiang P-Z, Tsai M-S, Tseng H-Y, Lai W-S, Chiu W-C (2022) Stylizing 3D scene via implicit representation and hypernetwork. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1475\u20131484","DOI":"10.1109\/WACV51458.2022.00029"},{"issue":"240","key":"10937_CR22","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery A, Narang S, Devlin J, Bosma M, Mishra G, Roberts A, Barham P, Chung HW, Sutton C, Gehrmann S (2023) PALM: scaling language modeling with pathways. J Mach Learn Res 24(240):1\u2013113","journal-title":"J Mach Learn Res"},{"key":"10937_CR23","doi-asserted-by":"crossref","unstructured":"Choy CB, Xu D, Gwak J, Chen K, Savarese S (2016) 3d-r2n2: a unified approach for single and multi-view 3D object reconstruction. In: Computer Vision\u2013ECCV 2016: 14th European conference, Amsterdam, The Netherlands, 11\u201314 October 2016, Proceedings, Part VIII 14. Springer, pp 628\u2013644","DOI":"10.1007\/978-3-319-46484-8_38"},{"issue":"4","key":"10937_CR24","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1109\/MITS.2024.3381793","volume":"16","author":"C Cui","year":"2024","unstructured":"Cui C, Ma Y, Cao X, Ye W, Wang Z (2024) Receive, reason, and react: drive as you say, with large language models in autonomous vehicles. IEEE Intell Transp Syst Mag 16(4):81\u201394","journal-title":"IEEE Intell Transp Syst Mag"},{"key":"10937_CR25","doi-asserted-by":"crossref","unstructured":"Curless B, Levoy M (1996) A volumetric method for building complex models from range images. In: Proceedings of the 23rd annual conference on computer graphics and interactive techniques, pp 303\u2013312","DOI":"10.1145\/237170.237269"},{"key":"10937_CR26","unstructured":"Dai P, Tan F, Yu X, Zhang Y, Qi X (2024) Go-Nerf: generating virtual objects in neural radiance fields. arXiv preprint arXiv:2401.05750 (2024)"},{"key":"10937_CR27","doi-asserted-by":"crossref","unstructured":"Decatur D, Lang I, Aberman K, Hanocka R (2024) 3d paintbrush: Local stylization of 3d shapes with cascaded score distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4473\u20134483","DOI":"10.1109\/CVPR52733.2024.00428"},{"key":"10937_CR28","doi-asserted-by":"crossref","unstructured":"Deitke M, Schwenk D, Salvador J, Weihs L, Michel O, VanderBilt E, Schmidt L, Ehsani K, Kembhavi A, Farhadi A (2023) Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13142\u201313153","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"10937_CR29","doi-asserted-by":"crossref","unstructured":"Dihlmann J-N, Engelhardt A, Lensch H (2024) SIGNeRF: scene integrated generation for neural radiance fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6679\u20136688","DOI":"10.1109\/CVPR52733.2024.00638"},{"key":"10937_CR30","unstructured":"Dong J, Wang Y-X (2024) ViCA-NeRF: view-consistency-aware 3D editing of neural radiance fields. In: NIPS '23: Proceedings of the 37th international conference on neural information processing systems, vol 30, pp 61466\u201361477"},{"key":"10937_CR31","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16\u00d716 words: transformers for image recognition at scale. arXiv preprint. arXiv:2010.11929"},{"key":"10937_CR33","doi-asserted-by":"crossref","unstructured":"Fan Z, Jiang Y, Wang P, Gong X, Xu D, Wang Z (2022) Unified implicit neural stylization. In: European conference on computer vision. Springer, pp 636\u2013654","DOI":"10.1007\/978-3-031-19784-0_37"},{"key":"10937_CR32","unstructured":"Fang S, Wang Y, Yang Y, Tsai Y-H, Ding W, Zhou S, Yang M-H (2023) Editing 3D scenes via text prompts without retraining. arXiv e-prints. arXiv: 2309.04917"},{"key":"10937_CR34","unstructured":"Foo LG, Rahmani H, Liu J (2023) AI-generated content (AIGC) for various data modalities: a survey. arXiv preprint. arXiv:2308.14177"},{"key":"10937_CR35","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil S, Yu A, Tancik M, Chen Q, Recht B, Kanazawa A (2022) Plenoxels: radiance fields without neural networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5501\u20135510","DOI":"10.1109\/CVPR52688.2022.00542"},{"key":"10937_CR36","doi-asserted-by":"crossref","unstructured":"Gafni O, Polyak A, Ashual O, Sheynin S, Parikh D, Taigman Y (2022) Make-a-scene: scene-based text-to-image generation with human priors. In: European conference on computer vision. Springer, pp 89\u2013106","DOI":"10.1007\/978-3-031-19784-0_6"},{"issue":"4","key":"10937_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530164","volume":"41","author":"R Gal","year":"2022","unstructured":"Gal R, Patashnik O, Maron H, Bermano AH, Chechik G, Cohen-Or D (2022a) StyleGAN-NADA: clip-guided domain adaptation of image generators. ACM Trans Graph (TOG) 41(4):1\u201313","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR38","unstructured":"Gal R, Alaluf Y, Atzmon Y, Patashnik O, Bermano AH, Chechik G, Cohen-Or D (2022b) An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint. arXiv:2208.01618"},{"key":"10937_CR39","first-page":"9936","volume":"33","author":"J Gao","year":"2020","unstructured":"Gao J, Chen W, Xiang T, Jacobson A, McGuire M, Fidler S (2020) Learning deformable tetrahedral meshes for 3D reconstruction. Adv Neural Inf Process Syst 33:9936\u20139947","journal-title":"Adv Neural Inf Process Syst"},{"key":"10937_CR40","doi-asserted-by":"crossref","unstructured":"Gao W, Aigerman N, Groueix T, Kim V, Hanocka R (2023) Textdeformer: Geometry manipulation using text guidance. In: ACM SIGGRAPH 2023 conference proceedings, pp 1\u201311","DOI":"10.1145\/3588432.3591552"},{"key":"10937_CR41","doi-asserted-by":"crossref","unstructured":"Gao C, Jiang B, Li X, Zhang Y, Yu Q (2024) Genesistex: adapting image denoising diffusion to texture space. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4620\u20134629","DOI":"10.1109\/CVPR52733.2024.00442"},{"key":"10937_CR42","doi-asserted-by":"crossref","unstructured":"Ge S, Park T, Zhu J-Y, Huang J-B (2023) Expressive text-to-image generation with rich text. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 7545\u20137556","DOI":"10.1109\/ICCV51070.2023.00694"},{"key":"10937_CR43","doi-asserted-by":"crossref","unstructured":"H\u00e4ne C, Tulsiani S, Malik J (2017) Hierarchical surface prediction for 3D object reconstruction. In: 2017 International conference on 3D vision (3DV). IEEE, pp 412\u2013420","DOI":"10.1109\/3DV.2017.00054"},{"issue":"4","key":"10937_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3322959","volume":"38","author":"R Hanocka","year":"2019","unstructured":"Hanocka R, Hertz A, Fish N, Giryes R, Fleishman S, Cohen-Or D (2019) MeshCNN: a network with an edge. ACM Trans Graph (ToG) (ToG) 38(4):1\u201312","journal-title":"ACM Trans Graph (ToG)"},{"key":"10937_CR45","doi-asserted-by":"crossref","unstructured":"Haque A, Tancik M, Efros AA, Holynski A, Kanazawa A (2023) Instruct-NeRF2NeRF: editing 3D scenes with instructions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 19740\u201319750","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"10937_CR49","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"10937_CR46","doi-asserted-by":"crossref","unstructured":"He R, Huang S, Nie X, Hui T, Liu L, Dai J, Han J, Li G, Liu S (2024) Customize your NeRF: adaptive source driven 3D scene editing via local-global iterative training. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6966\u20136975","DOI":"10.1109\/CVPR52733.2024.00665"},{"key":"10937_CR47","unstructured":"Hertz A, Mokady R, Tenenbaum J, Aberman K, Pritch Y, Cohen-Or D (2022) Prompt-to-prompt image editing with cross attention control. arXiv preprint. arXiv:2208.01626"},{"key":"10937_CR48","unstructured":"Heusel M, Ramsauer H, Unterthiner T, Nessler B, Hochreiter S (2017) GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: NIPS'17: proceedings of the 31st international conference on neural information processing systems, vol 30, pp 6629\u2013640"},{"key":"10937_CR52","unstructured":"Ho J, Salimans T (2022) Classifier-free diffusion guidance. arXiv preprint. arXiv:2207.12598"},{"key":"10937_CR50","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho J, Jain A, Abbeel P (2020) Denoising diffusion probabilistic models. Adv Neural Inf Process Syst 33:6840\u20136851","journal-title":"Adv Neural Inf Process Syst"},{"key":"10937_CR51","doi-asserted-by":"crossref","unstructured":"Hoffman J, Hu T, Kanyuk P, Marshall S, Nguyen G, Schroers H, Witting P (2023) Creating elemental characters: from sparks to fire. In: ACM SIGGRAPH 2023 Talks, pp 1\u20132","DOI":"10.1145\/3587421.3595467"},{"key":"10937_CR53","unstructured":"Hu EJ, Shen Y, Wallis P, Allen-Zhu Z, Li Y, Wang S, Wang L, Chen W (2021) LORA: low-rank adaptation of large language models. arXiv preprint. arXiv:2106.09685"},{"key":"10937_CR54","unstructured":"Huang Y, Huang J, Liu Y, Yan M, Lv J, Liu J, Xiong W, Zhang H, Chen S, Cao L (2024a) Diffusion model-based image editing: a survey. arXiv preprint. arXiv:2402.17525"},{"key":"10937_CR55","doi-asserted-by":"crossref","unstructured":"Huang Q, Liao Y, Hao Y, Zhou P (2024b) Noise-NeRF: hide information in neural radiance fields using trainable noise. arXiv preprint. arXiv:2401.01216","DOI":"10.1007\/978-3-031-72335-3_22"},{"key":"10937_CR56","doi-asserted-by":"crossref","unstructured":"Hu T, Xu X, Liu S, Jia J (2023) Point2Pix: photo-realistic point cloud rendering via neural radiance fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8349\u20138358","DOI":"10.1109\/CVPR52729.2023.00807"},{"key":"10937_CR57","doi-asserted-by":"crossref","unstructured":"Hyung J, Hwang S, Kim D, Lee H, Choo J (2023) Local 3D editing via 3D distillation of clip knowledge. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12674\u201312684","DOI":"10.1109\/CVPR52729.2023.01219"},{"key":"10937_CR58","unstructured":"Jun H, Nichol A (2023) Shap-E: generating conditional 3D implicit functions. arXiv preprint. arXiv:2305.02463"},{"key":"10937_CR59","unstructured":"Kamata H, Sakuma Y, Hayakawa A, Ishii M, Narihira T (2023) Instruct 3D-to-3D: Text instruction guided 3D-to-3D conversion. arXiv preprint. arXiv:2303.15780"},{"key":"10937_CR60","doi-asserted-by":"crossref","unstructured":"Karim N, Khalid U, Iqbal H, Hua J, Chen C (2023) Free-editor: Zero-shot text-driven 3D scene editing. arXiv preprint. arXiv:2312.13663","DOI":"10.1007\/978-3-031-72989-8_25"},{"key":"10937_CR61","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aila T (2019) A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4401\u20134410","DOI":"10.1109\/CVPR.2019.00453"},{"key":"10937_CR62","doi-asserted-by":"crossref","unstructured":"Kato H, Ushiku Y, Harada T (2018) Neural 3D mesh renderer. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3907\u20133916","DOI":"10.1109\/CVPR.2018.00411"},{"key":"10937_CR63","doi-asserted-by":"crossref","unstructured":"Kawar B, Zada S, Lang O, Tov O, Chang H, Dekel T, Mosseri I, Irani M (2023) Imagic: text-based real image editing with diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6007\u20136017","DOI":"10.1109\/CVPR52729.2023.00582"},{"issue":"4","key":"10937_CR64","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3592433","volume":"42","author":"B Kerbl","year":"2023","unstructured":"Kerbl B, Kopanas G, Leimk\u00fchler T, Drettakis G (2023) 3D Gaussian splatting for real-time radiance field rendering. ACM Trans Graph (TOG) 42(4):139\u20131","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR65","doi-asserted-by":"crossref","unstructured":"Khalid U, Iqbal H, Karim N, Hua J, Chen C (2023) Latenteditor: Text driven local editing of 3D scenes. arXiv preprint. arXiv:2312.09313","DOI":"10.1007\/978-3-031-73039-9_21"},{"key":"10937_CR66","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, Xiao T, Whitehead S, Berg AC, Lo W-Y (2023) Segment anything. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4015\u20134026","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"10937_CR67","doi-asserted-by":"crossref","unstructured":"Kumari N, Zhang B, Zhang R, Shechtman E, Zhu J-Y (2023) Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1931\u20131941","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"10937_CR68","doi-asserted-by":"crossref","unstructured":"Lassner C, Zollhofer M (2021) Pulsar: efficient sphere-based neural rendering. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1440\u20131449","DOI":"10.1109\/CVPR46437.2021.00149"},{"key":"10937_CR69","first-page":"30923","volume":"35","author":"J Lei","year":"2022","unstructured":"Lei J, Zhang Y, Jia K (2022) TANGO: text-driven photorealistic and robust 3d stylization via lighting decomposition. Adv Neural Inf Process Syst 35:30923\u201330936","journal-title":"Adv Neural Inf Process Syst"},{"key":"10937_CR72","doi-asserted-by":"crossref","unstructured":"Li C, Feng BY, Fan Z, Pan P, Wang Z (2023a) StegaNeRF: embedding invisible information within neural radiance fields. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 441\u2013453","DOI":"10.1109\/ICCV51070.2023.00047"},{"key":"10937_CR73","unstructured":"Li J, Liu S, Liu Z, Wang Y, Zheng K, Xu J, Li J, Zhu J (2023b) InstructPix2NeRF: instructed 3d portrait editing from a single image. arXiv preprint. arXiv:2311.02826"},{"key":"10937_CR74","doi-asserted-by":"crossref","unstructured":"Li Y, Liu H, Wu Q, Mu F, Yang J, Gao J, Li C, Lee YJ (2023c) GLIGEN: open-set grounded text-to-image generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22511\u201322521","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"10937_CR79","unstructured":"Li C, Zhang C, Waghwase A, Lee L-H, Rameau F, Yang Y, Bae S-H, Hong CS (2023d) Generative ai meets 3d: A survey on text-to-3d in aigc era. arXiv preprint arXiv:2305.06131"},{"key":"10937_CR71","doi-asserted-by":"crossref","unstructured":"Li Y, Dou Y, Shi Y, Lei Y, Chen X, Zhang Y, Zhou P, Ni B (2024a) FocalDreamer: text-driven 3D editing via focal-fusion assembly. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 3279\u20133287","DOI":"10.1609\/aaai.v38i4.28113"},{"key":"10937_CR78","unstructured":"Li X, Zhang Q, Kang D, Cheng W, Gao, Y, Zhang J, Liang Z, Liao J, Cao Y-P, Shan Y (2024b) Advances in 3D generation: a survey. arXiv preprint. arXiv:2401.17807"},{"key":"10937_CR70","unstructured":"Liao JZZLJ, Cao Y-P, Shan Y (2024) Advances in 3D generation: a survey. arXiv preprint. arXiv:2401.17807"},{"key":"10937_CR75","doi-asserted-by":"crossref","unstructured":"Lin C-H, Gao J, Tang L, Takikawa T, Zeng X, Huang X, Kreis K, Fidler S, Liu M-Y, Lin T-Y (2023) MAGIC3D: high-resolution text-to-3d content creation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 300\u2013309","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"10937_CR76","doi-asserted-by":"crossref","unstructured":"Liu S, Li T, Chen W, Li H (2019) Soft rasterizer: a differentiable renderer for image-based 3d reasoning. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 7708\u20137717","DOI":"10.1109\/ICCV.2019.00780"},{"key":"10937_CR77","unstructured":"Liu Y, Lin C, Zeng Z, Long X, Liu L, Komura T, Wang W (2023) SyncDreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453"},{"key":"10937_CR80","doi-asserted-by":"crossref","unstructured":"Loper MM, Black MJ (2014) OpenDR: an approximate differentiable renderer. In: Computer vision\u2014ECCV 2014: 13th European conference, Zurich, Switzerland,  6\u201312 September 2014, Proceedings, Part VII 13. Springer, pp 154\u2013169","DOI":"10.1007\/978-3-319-10584-0_11"},{"key":"10937_CR81","doi-asserted-by":"crossref","unstructured":"Lorensen WE, Cline HE (1998) Marching cubes: a high resolution 3D surface construction algorithm. In: Seminal graphics: pioneering efforts that shaped the field, pp 347\u2013353","DOI":"10.1145\/280811.281026"},{"key":"10937_CR82","doi-asserted-by":"crossref","unstructured":"L\u00fcddecke T, Ecker A (2022) Image segmentation using text and image prompts. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7086\u20137096","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"10937_CR83","doi-asserted-by":"crossref","unstructured":"Lugmayr A, Danelljan M, Romero A, Yu F, Timofte R, Van\u00a0Gool L (2022) RePaint: inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11461\u201311471","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"10937_CR84","doi-asserted-by":"crossref","unstructured":"Luo Z, Guo Q, Cheung KC, See S, Wan R (2023) CopyRNeRF: protecting the copyright of neural radiance fields. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 22401\u201322411","DOI":"10.1109\/ICCV51070.2023.02047"},{"key":"10937_CR87","doi-asserted-by":"crossref","unstructured":"Ma Y, Zhang X, Sun X, Ji J, Wang H, Jiang G, Zhuang W, Ji R (2023) X-MESH: towards fast and accurate text-driven 3D stylization via dynamic textual guidance. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2749\u20132760","DOI":"10.1109\/ICCV51070.2023.00258"},{"key":"10937_CR85","unstructured":"Manukyan H, Sargsyan A, Atanyan B, Wang Z, Navasardyan S, Shi H (2023) HD-Painter: high-resolution and prompt-faithful text-guided image inpainting with diffusion models. arXiv preprint. arXiv:2312.14091"},{"key":"10937_CR86","doi-asserted-by":"crossref","unstructured":"Maturana D, Scherer S (2015) VoxNet: a 3D convolutional neural network for real-time object recognition. In: 2015 IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE, pp 922\u2013928","DOI":"10.1109\/IROS.2015.7353481"},{"key":"10937_CR88","doi-asserted-by":"crossref","unstructured":"Memery S, Cedron O, Subr K (2023) Generating parametric brdfs from natural language descriptions. In: Computer graphics forum, vol 42. Wiley Online Library, p 14980","DOI":"10.1111\/cgf.14980"},{"key":"10937_CR89","doi-asserted-by":"crossref","unstructured":"Meng C, Rombach R, Gao R, Kingma D, Ermon S, Ho J, Salimans T (2023) On distillation of guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 14297\u201314306","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"10937_CR90","doi-asserted-by":"crossref","unstructured":"Metzer G, Richardson E, Patashnik O, Giryes R, Cohen-Or D (2023) Latent-Nerf for shape-guided generation of 3D shapes and textures. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12663\u201312673","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"10937_CR91","doi-asserted-by":"crossref","unstructured":"Michel O, Bar-On R, Liu R, Benaim S, Hanocka R (2022) Text2mesh: text-driven neural stylization for meshes. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13492\u201313502","DOI":"10.1109\/CVPR52688.2022.01313"},{"key":"10937_CR92","doi-asserted-by":"crossref","unstructured":"Mikaeili A, Perel O, Safaee M, Cohen-Or D, Mahdavi-Amiri A (2023) SKED: sketch-guided text-based 3d editing. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 14607\u201314619","DOI":"10.1109\/ICCV51070.2023.01343"},{"issue":"1","key":"10937_CR93","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall B, Srinivasan PP, Tancik M, Barron JT, Ramamoorthi R, Ng R (2021) NERF: representing scenes as neural radiance fields for view synthesis. Commun ACM 65(1):99\u2013106","journal-title":"Commun ACM"},{"key":"10937_CR94","doi-asserted-by":"crossref","unstructured":"Mirzaei A, Aumentado-Armstrong T, Brubaker MA, Kelly J, Levinshtein, A, Derpanis KG, Gilitschenski I (2023a) Reference-guided controllable inpainting of neural radiance fields. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 17815\u201317825","DOI":"10.1109\/ICCV51070.2023.01633"},{"key":"10937_CR95","doi-asserted-by":"crossref","unstructured":"Mirzaei A, Aumentado-Armstrong T, Brubaker MA, Kelly J, Levinshtein A, Derpanis KG, Gilitschenski I (2023b) Watch your steps: local image and scene editing by text instructions. arXiv preprint. arXiv:2308.08947","DOI":"10.1007\/978-3-031-72920-1_7"},{"key":"10937_CR96","doi-asserted-by":"crossref","unstructured":"Mokady R, Hertz A, Aberman K, Pritch Y, Cohen-Or D (2023) Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6038\u20136047","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"10937_CR97","doi-asserted-by":"crossref","unstructured":"Mou C, Wang X, Xie L, Wu Y, Zhang J, Qi Z, Shan Y (2024) T2I-Adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 4296\u20134304","DOI":"10.1609\/aaai.v38i5.28226"},{"issue":"4","key":"10937_CR98","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller T, Evans A, Schied C, Keller A (2022) Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans Graph (ToG) 41(4):1\u201315","journal-title":"ACM Trans Graph (ToG)"},{"key":"10937_CR99","doi-asserted-by":"crossref","unstructured":"Munkberg J, Hasselgren J, Shen T, Gao J, Chen W, Evans A, M\u00fcller T, Fidler S (2022) Extracting triangular 3D models, materials, and lighting from images. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8280\u20138290","DOI":"10.1109\/CVPR52688.2022.00810"},{"key":"10937_CR100","doi-asserted-by":"crossref","unstructured":"Nalbach O, Arabadzhiyska E, Mehta D, Seidel H-P, Ritschel T (2017) Deep shading: convolutional neural networks for screen space shading. In: Computer graphics forum, vol 36. Wiley Online Library, pp 65\u201378","DOI":"10.1111\/cgf.13225"},{"key":"10937_CR101","doi-asserted-by":"crossref","unstructured":"Newcombe RA, Izadi S, Hilliges O, Molyneaux D, Kim D, Davison AJ, Kohi P, Shotton J, Hodges S, Fitzgibbon A (2011) Kinectfusion: real-time dense surface mapping and tracking. In: 2011 10th IEEE international symposium on mixed and augmented reality. IEEE, pp 127\u2013136","DOI":"10.1109\/ISMAR.2011.6092378"},{"key":"10937_CR102","unstructured":"Nichol AQ, Dhariwal P, Ramesh A, Shyam P, Mishkin P, Mcgrew B, Sutskever I, Chen M (2022) Glide: towards photorealistic image generation and editing with text-guided diffusion models. In: International conference on machine learning. PMLR, pp 16784\u201316804"},{"key":"10937_CR103","unstructured":"Oh Y, Choi J, Kim Y, Park M, Shin C, Yoon S (2023) Controldreamer: Stylized 3D generation with multi-view controlnet. arXiv preprint. arXiv:2312.01129"},{"key":"10937_CR104","unstructured":"OpenAI (2023) GPT-4V(ision) system card. OpenAI"},{"key":"10937_CR105","doi-asserted-by":"crossref","unstructured":"Oppenlaender J (2022) The creativity of text-to-image generation. In: Proceedings of the 25th international academic mindtrek conference, pp 192\u2013202","DOI":"10.1145\/3569219.3569352"},{"key":"10937_CR106","unstructured":"Palandra F, Sanchietti A, Baieri D, Rodol\u00e0 E (2024) GSEDIT: efficient text-guided editing of 3D objects via Gaussian splatting. arXiv preprint. arXiv:2403.05154"},{"key":"10937_CR107","doi-asserted-by":"crossref","unstructured":"Park JJ, Florence P, Straub J, Newcombe R, Lovegrove S (2019) DEEPSDF: learning continuous signed distance functions for shape representation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 165\u2013174","DOI":"10.1109\/CVPR.2019.00025"},{"key":"10937_CR108","unstructured":"Park J, Kwon G, Ye JC (2023) ED-NERF: efficient text-guided editing of 3D scene using latent space nerf. arXiv preprint. arXiv:2310.02712"},{"key":"10937_CR109","doi-asserted-by":"crossref","unstructured":"Patashnik O, Wu Z, Shechtman E, Cohen-Or D, Lischinski D (2021) STYLECLIP: text-driven manipulation of stylegan imagery. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2085\u20132094","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"10937_CR110","doi-asserted-by":"crossref","unstructured":"Pfister H, Zwicker M, Van\u00a0Baar J, Gross M (2000) SURFELS: surface elements as rendering primitives. In: Proceedings of the 27th annual conference on computer graphics and interactive techniques, pp 335\u2013342","DOI":"10.1145\/344779.344936"},{"key":"10937_CR111","unstructured":"Poole B, Jain A, Barron JT, Mildenhall B (2022) DREAMFUSION: text-to-3d using 2D diffusion. arXiv preprint. arXiv:2209.14988"},{"key":"10937_CR112","unstructured":"Qi CR, Su H, Mo K, Guibas LJ (2017a) PointNet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 652\u2013660"},{"key":"10937_CR113","unstructured":"Qi CR, Yi L, Su H, Guibas LJ (2017b) PointNet++: deep hierarchical feature learning on point sets in a metric space. In: NIPS'17: Proceedings of the 31st international conference on neural information processing systems, vol 30, pp 5105\u20135114"},{"key":"10937_CR114","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning. PMLR, pp 8748\u20138763"},{"key":"10937_CR115","doi-asserted-by":"crossref","unstructured":"Raj A, Kaza S, Poole B, Niemeyer M, Ruiz N, Mildenhall B, Zada S, Aberman K, Rubinstein M, Barron J (2023) DreamBooth3D: subject-driven text-to-3d generation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2349\u20132359","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"10937_CR116","doi-asserted-by":"crossref","unstructured":"Rakhimov R, Ardelean A-T, Lempitsky V, Burnaev E (2022) Npbg++: Accelerating neural point-based graphics. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15969\u201315979","DOI":"10.1109\/CVPR52688.2022.01550"},{"key":"10937_CR117","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M (2022) Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1(2), 3"},{"key":"10937_CR118","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H (2016) Generative adversarial text to image synthesis. In: International conference on machine learning. PMLR, pp 1060\u20131069"},{"key":"10937_CR120","unstructured":"Ren J, Pan L, Tang J, Zhang C, Cao A, Zeng G, Liu Z (2023) DreamGaussian4D: generative 4D gaussian splatting. arXiv preprint. arXiv:2312.17142"},{"key":"10937_CR119","unstructured":"Ren T, Liu S, Zeng A, Lin J, Li K, Cao H, Chen J, Huang X, Chen Y, Yan F et al (2024) Grounded SAM: assembling open-world models for diverse visual tasks. arXiv preprint. arXiv:2401.14159"},{"key":"10937_CR121","doi-asserted-by":"crossref","unstructured":"Richardson E, Metzer G, Alaluf Y, Giryes R, Cohen-Or D (2023) Texture: text-guided texturing of 3D shapes. In: ACM SIGGRAPH 2023 conference proceedings, pp 1\u201311","DOI":"10.1145\/3588432.3591503"},{"key":"10937_CR122","doi-asserted-by":"crossref","unstructured":"Riegler G, Osman\u00a0Ulusoy A, Geiger A (2017) OctNet: learning deep 3D representations at high resolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3577\u20133586","DOI":"10.1109\/CVPR.2017.701"},{"key":"10937_CR123","doi-asserted-by":"crossref","unstructured":"Rombach R, Blattmann A, Lorenz D, Esser P, Ommer B (2022) High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10684\u201310695","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"4","key":"10937_CR124","first-page":"1","volume":"41","author":"D R\u00fcckert","year":"2022","unstructured":"R\u00fcckert D, Franke L, Stamminger M (2022) Adop: Approximate differentiable one-pixel point rendering. ACM Trans Graph (ToG) (ToG) 41(4):1\u201314","journal-title":"ACM Trans Graph (ToG)"},{"key":"10937_CR125","doi-asserted-by":"crossref","unstructured":"Ruiz N, Li Y, Jampani V, Pritch Y, Rubinstein M, Aberman K (2023) Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22500\u201322510","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"10937_CR126","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, Li L, Whang J, Denton EL, Ghasemipour K, Gontijo Lopes R, Karagol Ayan B, Salimans T (2022) Photorealistic text-to-image diffusion models with deep language understanding. Adv Neural Inf Process Syst 35:36479\u201336494","journal-title":"Adv Neural Inf Process Syst"},{"key":"10937_CR127","doi-asserted-by":"crossref","unstructured":"Sanghi A, Chu H, Lambourne JG, Wang Y, Cheng C-Y, Fumero M, Malekshan KR (2022) Clip-forge: towards zero-shot text-to-shape generation. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 18603\u201318613","DOI":"10.1109\/CVPR52688.2022.01805"},{"key":"10937_CR128","doi-asserted-by":"crossref","unstructured":"Sella E, Fiebelman G, Hedman P, Averbuch-Elor H (2023) VOX-E: text-guided voxel editing of 3d objects. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 430\u2013440","DOI":"10.1109\/ICCV51070.2023.00046"},{"key":"10937_CR129","unstructured":"Shahbazi M, Claessens L, Niemeyer M, Collins E, Tonioni A, Van\u00a0Gool L, Tombari F (2024) INSERF: text-driven generative object insertion in neural 3D scenes. arXiv preprint. arXiv:2401.05335"},{"key":"10937_CR130","doi-asserted-by":"crossref","unstructured":"Shi S, Guo C, Jiang L, Wang Z, Shi J, Wang X, Li H (2020) PV-RCNN: point-voxel feature set abstraction for 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10529\u201310538","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"10937_CR131","unstructured":"Shi Z, Peng S, Xu Y, Geiger A, Liao Y, Shen Y (2022) Deep generative models on 3D representations: a survey. arXiv preprint. arXiv:2210.15663"},{"key":"10937_CR133","unstructured":"Shi Y, Wang P, Ye J, Long M, Li K, Yang X (2023) MVDREAM: multi-view diffusion for 3D generation. arXiv preprint. arXiv:2308.16512"},{"issue":"4","key":"10937_CR132","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/0167-8396(87)90003-3","volume":"4","author":"LA Shirman","year":"1987","unstructured":"Shirman LA, Sequin CH (1987) Local surface interpolation with B\u00e9zier patches. Computer Aid Geom Des 4(4):279\u2013295","journal-title":"Computer Aid Geom Des"},{"key":"10937_CR134","doi-asserted-by":"crossref","unstructured":"Shum KC, Kim J, Hua, B-S, Nguyen DT, Yeung S-K (2024) Language-driven object fusion into neural radiance fields with pose-conditioned dataset updates. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5176\u20135187","DOI":"10.1109\/CVPR52733.2024.00495"},{"key":"10937_CR135","unstructured":"Song L, Cao L, Gu J, Jiang Y, Yuan J, Tang H (2023a) Efficient-NeRF2NeRF: streamlining text-driven 3D editing with multiview correspondence-enhanced diffusion models. arXiv preprint. arXiv:2312.08563"},{"key":"10937_CR136","doi-asserted-by":"crossref","unstructured":"Song H, Choi S, Do H, Lee C, Kim T (2023b) Blending-NeRF: text-driven localized editing in neural radiance fields. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 14383\u201314393","DOI":"10.1109\/ICCV51070.2023.01323"},{"key":"10937_CR137","doi-asserted-by":"crossref","unstructured":"Stutz D, Geiger A (2018) Learning 3D shape completion from laser scan data with weak supervision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1955\u20131964","DOI":"10.1109\/CVPR.2018.00209"},{"key":"10937_CR138","doi-asserted-by":"crossref","unstructured":"Sun C, Sun M, Chen H-T (2022) Direct voxel grid optimization: super-fast convergence for radiance fields reconstruction. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5459\u20135469","DOI":"10.1109\/CVPR52688.2022.00538"},{"key":"10937_CR139","doi-asserted-by":"crossref","unstructured":"Suvorov R, Logacheva E, Mashikhin A, Remizova A, Ashukha A, Silvestrov A, Kong N, Goka H, Park K, Lempitsky V (2022) Resolution-robust large mask inpainting with Fourier convolutions. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 2149\u20132159","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"10937_CR140","unstructured":"Tang J, Ren J, Zhou H, Liu Z, Zeng G (2023) DreamGaussian: generative gaussian splatting for efficient 3D content creation. arXiv preprint. arXiv:2309.16653"},{"key":"10937_CR141","doi-asserted-by":"crossref","unstructured":"Taniguchi D (2019) AR-Net: immersive augmented reality with real-time neural style transfer. In: ACM SIGGRAPH 2019 virtual, augmented, and mixed reality, pp 1\u20131","DOI":"10.1145\/3306449.3328803"},{"key":"10937_CR142","doi-asserted-by":"crossref","unstructured":"Tatarchenko M, Dosovitskiy A, Brox T (2017) Octree generating networks: efficient convolutional architectures for high-resolution 3D outputs. In: Proceedings of the IEEE international conference on computer vision, pp 2088\u20132096","DOI":"10.1109\/ICCV.2017.230"},{"issue":"4","key":"10937_CR143","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323035","volume":"38","author":"J Thies","year":"2019","unstructured":"Thies J, Zollh\u00f6fer M, Nie\u00dfner M (2019) Deferred neural rendering: image synthesis using neural textures. ACM Trans Graph (TOG) 38(4):1\u201312","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR144","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, vol 30"},{"key":"10937_CR152","doi-asserted-by":"crossref","unstructured":"Wang N, Zhang Y, Li Z, Fu Y, Liu W, Jiang Y-G (2018) Pixel2Mesh: generating 3D mesh models from single RGB images. In: Proceedings of the European conference on computer vision, pp 52\u201367","DOI":"10.1007\/978-3-030-01252-6_4"},{"issue":"5","key":"10937_CR146","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang Y, Sun Y, Liu Z, Sarma SE, Bronstein MM, Solomon JM (2019) Dynamic graph CNN for learning on point clouds. ACM Trans Graph (TOG) 38(5):1\u201312","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR147","doi-asserted-by":"crossref","unstructured":"Wang C, Chai M, He M, Chen D, Liao J (2022) Clip-Nerf: text-and-image driven manipulation of neural radiance fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3835\u20133844","DOI":"10.1109\/CVPR52688.2022.00381"},{"key":"10937_CR145","unstructured":"Wang Z, Li M, Chen C (2023a) LucidDreaming: controllable object-centric 3D generation. arXiv preprint. arXiv:2312.00588"},{"issue":"8","key":"10937_CR149","doi-asserted-by":"publisher","first-page":"4983","DOI":"10.1109\/TVCG.2023.3283400","volume":"30","author":"C Wang","year":"2023","unstructured":"Wang C, Jiang R, Chai M, He M, Chen D, Liao J (2023b) NeRF-Art: text-driven neural radiance fields stylization. IEEE Trans Vis Comput Graph 30(8):4983\u20134996","journal-title":"IEEE Trans Vis Comput Graph"},{"key":"10937_CR151","doi-asserted-by":"crossref","unstructured":"Wang D, Zhang T, Abboud A, S\u00fcsstrunk S (2023c) InpaintNerf360: text-guided 3D inpainting on unbounded neural radiance fields. arXiv preprint. arXiv:2305.15094","DOI":"10.1109\/CVPR52733.2024.01205"},{"key":"10937_CR148","doi-asserted-by":"crossref","unstructured":"Wang J, Fang J, Zhang X, Xie L, Tian Q (2024a) GaussianEditor: editing 3D Gaussians delicately with text instructions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 20902\u201320911","DOI":"10.1109\/CVPR52733.2024.01975"},{"key":"10937_CR150","doi-asserted-by":"crossref","unstructured":"Wang Y, Yi X, Wu Z, Zhao N, Chen L, Zhang H (2024b) View-consistent 3D editing with gaussian splatting. arXiv preprint. arXiv:2403.11868","DOI":"10.1007\/978-3-031-72761-0_23"},{"key":"10937_CR153","doi-asserted-by":"crossref","unstructured":"Wen C, Zhang Y, Li Z, Fu Y (2019) Pixel2Mesh++: multi-view 3d mesh generation via deformation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1042\u20131051","DOI":"10.1109\/ICCV.2019.00113"},{"key":"10937_CR157","unstructured":"Wu Z, Song S, Khosla A, Yu F, Zhang L, Tang, X, Xiao J (2015) 3D ShapeNets: a deep representation for volumetric shapes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1912\u20131920"},{"key":"10937_CR160","unstructured":"Wu J, Zhang C, Xue T, Freeman B, Tenenbaum J (2016) Learning a probabilistic latent space of object shapes via 3D generative-adversarial modeling. In: NIPS'16: Proceedings of the 30th international conference on neural information processing systems, vol 29, pp 82\u201390"},{"key":"10937_CR156","doi-asserted-by":"crossref","unstructured":"Wu W, Qi Z, Fuxin L (2019) PointConv: deep convolutional networks on 3D point clouds. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9621\u20139630","DOI":"10.1109\/CVPR.2019.00985"},{"key":"10937_CR154","unstructured":"Wu J, Bian J-W, Li X, Wang G, Reid I, Torr P, Prisacariu VA (2024a) GaussCtrl: multi-view consistent text-driven 3D Gaussian splatting editing. arXiv preprint.arXiv:2403.08733"},{"key":"10937_CR155","unstructured":"Wu J, Liu X, Wu C, Gao X, Liu J, Liu X, Zhao C, Feng H, Ding E, Wang J (2024b) TEXRO: generating delicate textures of 3D models by recursive optimization. arXiv preprint. arXiv:2403.15009"},{"key":"10937_CR158","doi-asserted-by":"crossref","unstructured":"Wu T, Yang G, Li Z, Zhang K, Liu Z, Guibas L, Lin D, Wetzstein G (2024c) GPT-4V(ision) is a human-aligned evaluator for text-to-3D generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22227\u201322238","DOI":"10.1109\/CVPR52733.2024.02098"},{"key":"10937_CR159","doi-asserted-by":"crossref","unstructured":"Wu G, Yi T, Fang J, Xie L, Zhang X, Wei W, Liu W, Tian Q, Wang X (2024d) 4D Gaussian splatting for real-time dynamic scene rendering. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 20310\u201320320","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"10937_CR161","doi-asserted-by":"crossref","unstructured":"Xiao G, Yin T, Freeman WT, Durand F, Han S (2023) FastComposer: tuning-free multi-subject image generation with localized attention. arXiv preprint. arXiv:2305.10431","DOI":"10.1007\/s11263-024-02227-z"},{"key":"10937_CR162","doi-asserted-by":"crossref","unstructured":"Xie S, Zhang Z, Lin Z, Hinz T, Zhang K (2023) SmartBrush: text and shape guided object inpainting with diffusion model. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22428\u201322437","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"10937_CR165","doi-asserted-by":"crossref","unstructured":"Xu T, Zhang P, Huang Q, Zhang H, Gan Z, Huang X, He X (2018) AttnGAN: fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1316\u20131324","DOI":"10.1109\/CVPR.2018.00143"},{"key":"10937_CR164","doi-asserted-by":"crossref","unstructured":"Xu Q, Xu Z, Philip J, Bi S, Shu Z, Sunkavalli K, Neumann U (2022) Point-Nerf: point-based neural radiance fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5438\u20135448","DOI":"10.1109\/CVPR52688.2022.00536"},{"key":"10937_CR163","unstructured":"Xu S, Huang Y, Pan J, Ma Z, Chai J (2023) Inversion-free image editing with natural language. arXiv preprint. arXiv:2312.04965"},{"key":"10937_CR166","doi-asserted-by":"crossref","unstructured":"Yang B, Bao C, Zeng J, Bao H, Zhang Y, Cui Z, Zhang G (2022) Neumesh: learning disentangled neural mesh-based implicit field for geometry and texture editing. In: European conference on computer vision. Springer, pp 597\u2013614","DOI":"10.1007\/978-3-031-19787-1_34"},{"key":"10937_CR167","unstructured":"Yang Z, Yang H, Pan Z, Zhu X, Zhang L (2023) Real-time photorealistic dynamic scene representation and rendering with 4D Gaussian splatting. arXiv preprint. arXiv:2310.10642"},{"issue":"6","key":"10937_CR168","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3355089.3356513","volume":"38","author":"W Yifan","year":"2019","unstructured":"Yifan W, Serena F, Wu S, \u00d6ztireli C, Sorkine-Hornung O (2019) Differentiable surface splatting for point-based geometry processing. ACM Trans Graph (TOG) 38(6):1\u201314","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR169","unstructured":"Yin Y, Fu Z, Yang F, Lin G (2023) Or-Nerf: object removing from 3D scenes guided by multiview segmentation with neural radiance fields. arXiv preprint. arXiv:2305.10503"},{"key":"10937_CR170","doi-asserted-by":"crossref","unstructured":"Zeng X, Chen X, Qi Z, Liu W, Zhao Z, Wang Z, Fu B, Liu Y, Yu G (2024) Paint3D: paint anything 3D with lighting-less texture diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4252\u20134262","DOI":"10.1109\/CVPR52733.2024.00407"},{"key":"10937_CR171","doi-asserted-by":"crossref","unstructured":"Zhang L, Rao A, Agrawala M (2023) Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3836\u20133847","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"10937_CR172","first-page":"9251","volume":"33","author":"Y Zhou","year":"2020","unstructured":"Zhou Y, Wu C, Li Z, Cao C, Ye Y, Saragih J, Li H, Sheikh Y (2020) Fully convolutional mesh autoencoder using efficient spatially varying kernels. Adv Neural Inf Process Syst 33:9251\u20139262","journal-title":"Adv Neural Inf Process Syst"},{"key":"10937_CR173","doi-asserted-by":"crossref","unstructured":"Zhou X, He Y, Yu FR, Li J, Li Y (2023) Repaint-Nerf: nerf editting via semantic masks and diffusion models. In: Proceedings of the thirty-second international joint conference on artificial intelligence, pp 1813\u20131821","DOI":"10.24963\/ijcai.2023\/201"},{"key":"10937_CR174","doi-asserted-by":"publisher","first-page":"1729","DOI":"10.1109\/LSP.2021.3107777","volume":"28","author":"J Zhu","year":"2021","unstructured":"Zhu J, Zhang Y, Zhang X, Cao X (2021) Gaussian model for 3D mesh steganography. IEEE Signal Process Lett 28:1729\u20131733","journal-title":"IEEE Signal Process Lett"},{"key":"10937_CR176","doi-asserted-by":"crossref","unstructured":"Zhuang J, Wang C, Lin L, Liu L, Li G (2023) DREAMEDITOR: text-driven 3D scene editing with neural fields. In: SIGGRAPH Asia 2023 conference papers, pp 1\u201310","DOI":"10.1145\/3610548.3618190"},{"issue":"4","key":"10937_CR175","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3658205","volume":"43","author":"J Zhuang","year":"2024","unstructured":"Zhuang J, Kang D, Cao Y-P, Li G, Lin L, Shan Y (2024) Tip-Editor: an accurate 3D editor following both text-prompts and image-prompts. ACM Trans Graph (TOG) 43(4):1\u201312","journal-title":"ACM Trans Graph (TOG)"},{"key":"10937_CR177","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1016\/j.patrec.2024.07.002","volume":"185","author":"D Zimny","year":"2024","unstructured":"Zimny D, Waczy\u0144ska J, Trzci\u0144ski T, Spurek P (2024) Points2Nerf: generating neural radiance fields from 3D point cloud. Pattern Recogn Lett 185:8\u201314","journal-title":"Pattern Recogn Lett"},{"key":"10937_CR178","doi-asserted-by":"crossref","unstructured":"Zwicker M, Pfister H, Van\u00a0Baar J, Gross M (2001) Surface splatting. In: Proceedings of the 28th annual conference on computer graphics and interactive techniques, pp 371\u2013378","DOI":"10.1145\/383259.383300"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-024-10937-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-024-10937-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-024-10937-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,13]],"date-time":"2024-11-13T10:11:34Z","timestamp":1731492694000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-024-10937-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,12]]},"references-count":178,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["10937"],"URL":"https:\/\/doi.org\/10.1007\/s10462-024-10937-6","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,12]]},"assertion":[{"value":"29 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 October 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}],"article-number":"321"}}