{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:18:49Z","timestamp":1775128729116,"version":"3.50.1"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726293","type":"print"},{"value":"9783031726309","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72630-9_4","type":"book-chapter","created":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T05:18:58Z","timestamp":1733289538000},"page":"55-71","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["GaussCtrl: Multi-view Consistent Text-Driven 3D Gaussian Splatting Editing"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0942-5418","authenticated-orcid":false,"given":"Jing","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2046-3363","authenticated-orcid":false,"given":"Jia-Wang","family":"Bian","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3797-5082","authenticated-orcid":false,"given":"Xinghui","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7760-1339","authenticated-orcid":false,"given":"Guangrun","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7790-6423","authenticated-orcid":false,"given":"Ian","family":"Reid","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0259-5732","authenticated-orcid":false,"given":"Philip","family":"Torr","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0630-6129","authenticated-orcid":false,"given":"Victor Adrian","family":"Prisacariu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,5]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Verbin, D., Srinivasan, P.P., Hedman, P.: Mip-nerf 360: Unbounded anti-aliased neural radiance fields. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: Learning to follow image editing instructions. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Cao, M., Wang, X., Qi, Z., Shan, Y., Qie, X., Zheng, Y.: Masactrl: Tuning-free mutual self-attention control for consistent image synthesis and editing. arXiv preprint arXiv:2304.08465 (2023)","DOI":"10.1109\/ICCV51070.2023.02062"},{"key":"4_CR4","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. ArXiv abs\/2105.05233 (2021), https:\/\/api.semanticscholar.org\/CorpusID:234357997"},{"key":"4_CR5","unstructured":"Dong, J., Wang, Y.X.: Vica-nerf: View-consistency-aware 3d editing of neural radiance fields. In: Thirty-seventh Conference on Neural Information Processing Systems (2023)"},{"key":"4_CR6","unstructured":"Epstein, D., Jabri, A., Poole, B., Efros, A.A., Holynski, A.: Diffusion self-guidance for controllable image generation (2023)"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Gal, R., Patashnik, O., Maron, H., Chechik, G., Cohen-Or, D.: Stylegan-nada: Clip-guided domain adaptation of image generators (2021)","DOI":"10.1145\/3528223.3530164"},{"key":"4_CR8","doi-asserted-by":"publisher","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: Image style transfer using convolutional neural networks. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2414\u20132423 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.265","DOI":"10.1109\/CVPR.2016.265"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Haque, A., Tancik, M., Efros, A., Holynski, A., Kanazawa, A.: Instruct-nerf2nerf: Editing 3d scenes with instructions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Hertz, A., Aberman, K., Cohen-Or, D.: Delta denoising score (2023)","DOI":"10.1109\/ICCV51070.2023.00221"},{"key":"4_CR11","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control (2022)"},{"key":"4_CR12","unstructured":"Ho, J.: Classifier-free diffusion guidance. ArXiv:abs\/2207.12598 (2022)"},{"key":"4_CR13","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. arXiv preprint arxiv:2006.11239 (2020)"},{"key":"4_CR14","unstructured":"Hu, E.J., et al.: LoRA: Low-rank adaptation of large language models. In: International Conference on Learning Representations (2022).https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Huang, Y.H., He, Y., Yuan, Y.J., Lai, Y.K., Gao, L.: Stylizednerf: Consistent 3d scene stylization as stylized nerf via 2d-3d mutual learning. In: Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.01780"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3d gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4) (July 2023). https:\/\/repo-sam.inria.fr\/fungraph\/3d-gaussian-splatting\/","DOI":"10.1145\/3592433"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"4_CR18","unstructured":"Li, B., Weinberger, K.Q., Belongie, S., Koltun, V., Ranftl, R.: Language-driven semantic segmentation. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=RriDjddCLN"},{"key":"4_CR19","unstructured":"Li*, C., et\u00a0al.: Elevater: A benchmark and toolkit for evaluating language-augmented visual models. arXiv preprint arXiv:2204.08790 (2022)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Li*, L.H., et al.: Grounded language-image pre-training. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Liu, K., et al.: Stylerf: Zero-shot 3d style transfer of neural radiance fields (2023)","DOI":"10.1109\/CVPR52729.2023.00806"},{"key":"4_CR23","unstructured":"Liu, S., et al.: DAB-DETR: Dynamic anchor boxes are better queries for DETR. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=oMI9PjOb9Jl"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding dino: Marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"4_CR25","unstructured":"Luo, C.: Understanding diffusion models: A unified perspective. ArXiv abs\/2208.11970 (2022). https:\/\/api.semanticscholar.org\/CorpusID:251799923"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: Representing scenes as neural radiance fields for view synthesis (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. arXiv preprint arXiv:2211.09794 (2022)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Mou, C., Wang, X., Song, J., Shan, Y., Zhang, J.: Diffeditor: Boosting accuracy and flexibility on diffusion-based image editing. arXiv preprint arXiv:2402.02583 (2023)","DOI":"10.1109\/CVPR52733.2024.00811"},{"key":"4_CR29","unstructured":"Mou, C., Wang, X., Song, J., Shan, Y., Zhang, J.: Dragondiffusion: Enabling drag-style manipulation on diffusion models. arXiv preprint arXiv:2307.02421 (2023)"},{"key":"4_CR30","unstructured":"Nichol, A., Dhariwal, P.: Improved denoising diffusion probabilistic models. ArXiv:abs\/2102.09672 (2021). https:\/\/api.semanticscholar.org\/CorpusID:231979499"},{"key":"4_CR31","unstructured":"Nie, S., Guo, H.A., Lu, C., Zhou, Y., Zheng, C., Li, C.: The blessing of randomness: Sde beats ode in general diffusion-based image editing. arXiv preprint arXiv:2311.01410 (2023)"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Pan, X., Tewari, A., Leimk\u00fchler, T., Liu, L., Meka, A., Theobalt, C.: Drag your gan: Interactive point-based manipulation on the generative image manifold. In: ACM SIGGRAPH 2023 Conference Proceedings (2023)","DOI":"10.1145\/3588432.3591500"},{"key":"4_CR33","unstructured":"von Platen, P., et al.: Diffusers: State-of-the-art diffusion models. https:\/\/github.com\/huggingface\/diffusers (2022)"},{"key":"4_CR34","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"4_CR35","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. ArXiv:abs\/2204.06125 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248097655"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"4_CR38","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. ArXiv:abs\/2205.11487 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248986576"},{"key":"4_CR39","doi-asserted-by":"crossref","unstructured":"Shi, Y., Xue, C., Pan, J., Zhang, W., Tan, V.Y., Bai, S.: Dragdiffusion: Harnessing diffusion models for interactive point-based image editing. arXiv preprint arXiv:2306.14435 (2023)","DOI":"10.1109\/CVPR52733.2024.00844"},{"key":"4_CR40","unstructured":"Sohl-Dickstein, J., Weiss, E.A., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics, p. 2256\u20132265. ICML\u201915, JMLR.org (2015)"},{"key":"4_CR41","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv:2010.02502 (October 2020). https:\/\/arxiv.org\/abs\/2010.02502"},{"key":"4_CR42","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Nerfstudio: a modular framework for neural radiance field development. In: ACM SIGGRAPH 2023 Conference Proceedings. SIGGRAPH \u201923 (2023)","DOI":"10.1145\/3588432.3591516"},{"key":"4_CR43","unstructured":"Vachha, C., Haque, A.: Instruct-gs2gs: Editing 3d gaussian splats with instructions (2024). https:\/\/instruct-gs2gs.github.io\/"},{"key":"4_CR44","unstructured":"Wang, C., Jiang, R., Chai, M., He, M., Chen, D., Liao, J.: Nerf-art: Text-driven neural radiance fields stylization. arXiv preprint arXiv:2212.08070 (2022)"},{"key":"4_CR45","doi-asserted-by":"crossref","unstructured":"Yao, Y., et al.: Blendedmvs: a large-scale dataset for generalized multi-view stereo networks. In: Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"4_CR46","unstructured":"Zhang, H., et al.: Dino: Detr with improved denoising anchor boxes for end-to-end object detection (2022)"},{"key":"4_CR47","unstructured":"Zhang, H., et al.: Glipv2: Unifying localization and vision-language understanding. arXiv preprint arXiv:2206.05836 (2022)"},{"key":"4_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, K., et al.: Arf: Artistic radiance fields (2022)","DOI":"10.1007\/978-3-031-19821-2_41"},{"key":"4_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"4_CR50","doi-asserted-by":"crossref","unstructured":"Zhuang, J., Wang, C., Liu, L., Lin, L., Li, G.: Dreameditor: Text-driven 3d scene editing with neural fields. arXiv preprint arXiv:2306.13455 (2023)","DOI":"10.1145\/3610548.3618190"},{"key":"4_CR51","doi-asserted-by":"publisher","unstructured":"Zwicker, M., Pfister, H., van Baar, J., Gross, M.: Ewa volume splatting. In: Proceedings Visualization, 2001. VIS \u201901, pp. 29\u2013538 (2001). https:\/\/doi.org\/10.1109\/VISUAL.2001.964490","DOI":"10.1109\/VISUAL.2001.964490"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72630-9_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T06:38:21Z","timestamp":1768199901000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72630-9_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,5]]},"ISBN":["9783031726293","9783031726309"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72630-9_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,5]]},"assertion":[{"value":"5 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}