{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T19:45:49Z","timestamp":1775591149805,"version":"3.50.1"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732416","type":"print"},{"value":"9783031732423","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73242-3_26","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"463-479","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["GVGEN: Text-to-3D Generation with Volumetric Representation"],"prefix":"10.1007","author":[{"given":"Xianglong","family":"He","sequence":"first","affiliation":[]},{"given":"Junyi","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Sida","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Di","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Yangguang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiaoshui","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Chun","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"Tong","family":"He","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"26_CR1","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Cao, A., Johnson, J.: HexPlane: a fast representation for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 130\u2013141 (2023)","DOI":"10.1109\/CVPR52729.2023.00021"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Chang, J.H.R., Chen, W.Y., Ranjan, A., Yi, K.M., Tuzel, O.: Pointersect: neural rendering with cloud-ray intersection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8359\u20138369 (2023)","DOI":"10.1109\/CVPR52729.2023.00808"},{"key":"26_CR5","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1007\/978-3-031-19824-3_20","volume-title":"ECCV 2022","author":"A Chen","year":"2022","unstructured":"Chen, A., Xu, Z., Geiger, A., Yu, J., Su, H.: TensoRF: tensorial radiance fields. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 333\u2013350. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_20"},{"key":"26_CR6","unstructured":"Chen, G., Wang, W.: A survey on 3D Gaussian splatting. arXiv preprint arXiv:2401.03890 (2024)"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation. arXiv preprint arXiv:2303.13873 (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: GaussianEditor: swift and controllable 3D editing with Gaussian splatting. arXiv preprint arXiv:2311.14521 (2023)","DOI":"10.1109\/CVPR52733.2024.02029"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, F., Liu, H.: Text-to-3D using Gaussian splatting. arXiv preprint arXiv:2309.16585 (2023)","DOI":"10.1109\/CVPR52733.2024.02022"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, Y.C., Lee, H.Y., Tulyakov, S., Schwing, A.G., Gui, L.Y.: SDFusion: multimodal 3D shape completion, reconstruction, and generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4456\u20134465 (2023)","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Deitke, M., et\u00a0al.: Objaverse-XL: a universe of 10m+ 3D objects. In: Advances in Neural Information Processing Systems, vol. 36 (2024)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"26_CR13","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"26_CR14","unstructured":"He, Z., Wang, T.: OpenLRM: Open-source large reconstruction models (2023). https:\/\/github.com\/3DTopia\/OpenLRM"},{"key":"26_CR15","unstructured":"Hong, Y., et al.: LRM: large reconstruction model for single image to 3D. arXiv preprint arXiv:2311.04400 (2023)"},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Huang, Z., et\u00a0al.: EpiDiff: enhancing multi-view synthesis via localized epipolar-constrained diffusion. arXiv preprint arXiv:2312.06725 (2023)","DOI":"10.1109\/CVPR52733.2024.00934"},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Jain, A., Mildenhall, B., Barron, J.T., Abbeel, P., Poole, B.: Zero-shot text-guided object generation with dream fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 867\u2013876 (2022)","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"26_CR18","unstructured":"Jun, H., Nichol, A.: Shap-E: generating conditional 3D implicit functions. arXiv preprint arXiv:2305.02463 (2023)"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D Gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4) (2023)","DOI":"10.1145\/3592433"},{"key":"26_CR20","unstructured":"Li, J., et al.: Instant3D: fast text-to-3D with sparse-view generation and large reconstruction model. arXiv preprint arXiv:2311.06214 (2023)"},{"key":"26_CR21","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"26_CR22","unstructured":"Li, W., Chen, R., Chen, X., Tan, P.: SweetDreamer: aligning geometric priors in 2D diffusion for consistent text-to-3D. arXiv preprint arXiv:2310.02596 (2023)"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Liang, Y., Yang, X., Lin, J., Li, H., Xu, X., Chen, Y.: LucidDreamer: towards high-fidelity text-to-3d generation via interval score matching (2023)","DOI":"10.1109\/CVPR52733.2024.00623"},{"key":"26_CR24","unstructured":"Liu, Y., et al.: SyncDreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"26_CR25","doi-asserted-by":"publisher","unstructured":"Liu, Y., Li, Q., Sun, Z.: One-shot face reenactment with dense correspondence estimation. Mach. Intell. Res. 1\u201313 (2023). https:\/\/doi.org\/10.1007\/s11633-023-1433-9. https:\/\/www.mi-research.net\/en\/article\/doi\/10.1007\/s11633-023-1433-9","DOI":"10.1007\/s11633-023-1433-9"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: UniDream: unifying diffusion priors for relightable text-to-3D generation. arXiv preprint arXiv:2312.08754 (2023)","DOI":"10.1007\/978-3-031-72652-1_5"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3D: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"26_CR28","unstructured":"Luo, T., Rockwell, C., Lee, H., Johnson, J.: Scalable 3D captioning with pretrained models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Rupprecht, C., Vedaldi, A.: PC2: projection-conditioned point cloud diffusion for single-image 3D reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12923\u201312932 (2023)","DOI":"10.1109\/CVPR52729.2023.01242"},{"issue":"1","key":"26_CR30","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"26_CR31","doi-asserted-by":"crossref","unstructured":"Mohammad\u00a0Khalid, N., Xie, T., Belilovsky, E., Popa, T.: Clip-mesh: generating textured meshes from text using pretrained image-text models. In: SIGGRAPH Asia 2022 Conference Papers, pp.\u00a01\u20138 (2022)","DOI":"10.1145\/3550469.3555392"},{"issue":"4","key":"26_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"26_CR33","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-E: a system for generating 3D point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)"},{"key":"26_CR34","unstructured":"Ntavelis, E., Siarohin, A., Olszewski, K., Wang, C., Gool, L.V., Tulyakov, S.: Autodecoding latent 3D diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"26_CR35","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"26_CR36","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR37","unstructured":"Ren, J., et al.: DreamGaussian4D: generative 4D Gaussian splatting. arXiv preprint arXiv:2312.17142 (2023)"},{"key":"26_CR38","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"26_CR39","unstructured":"Shi, R., et al.: Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)"},{"key":"26_CR40","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: MVDream: multi-view diffusion for 3D generation. arXiv preprint arXiv:2308.16512 (2023)"},{"key":"26_CR41","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: DreamGaussian: generative Gaussian splatting for efficient 3D content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"26_CR42","unstructured":"Tang, Z., et al.: VolumeDiffusion: flexible text-to-3D generation with efficient volumetric encoder. arXiv preprint arXiv:2312.11459 (2023)"},{"key":"26_CR43","unstructured":"Wang, Z., et al.: ProlificDreamer: high-fidelity and diverse text-to-3D generation with variational score distillation. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"26_CR44","doi-asserted-by":"crossref","unstructured":"Wu, G., et al.: 4D Gaussian splatting for real-time dynamic scene rendering. arXiv preprint arXiv:2310.08528 (2023)","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"26_CR45","doi-asserted-by":"crossref","unstructured":"Wu, Z., Wang, Y., Feng, M., Xie, H., Mian, A.: Sketch and text guided diffusion model for colored point cloud generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8929\u20138939 (2023)","DOI":"10.1109\/ICCV51070.2023.00820"},{"key":"26_CR46","unstructured":"Xu, D., et al.: AGG: amortized generative 3D Gaussians for single image to 3D. arXiv preprint arXiv:2401.04099 (2024)"},{"key":"26_CR47","doi-asserted-by":"crossref","unstructured":"Xu, Q., et al.: Point-NeRF: point-based neural radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5438\u20135448 (2022)","DOI":"10.1109\/CVPR52688.2022.00536"},{"key":"26_CR48","unstructured":"Yi, T., et al.: GaussianDreamer: fast generation from text to 3D Gaussian splatting with point cloud priors. arXiv preprint arXiv:2310.08529 (2023)"},{"key":"26_CR49","unstructured":"Yin, Y., Xu, D., Wang, Z., Zhao, Y., Wei, Y.: 4DGen: grounded 4D content generation with spatial-temporal consistency. arXiv preprint arXiv:2312.17225 (2023)"},{"key":"26_CR50","unstructured":"Yu, X., Guo, Y.C., Li, Y., Liang, D., Zhang, S.H., Qi, X.: Text-to-3D with classifier score distillation. arXiv preprint arXiv:2310.19415 (2023)"},{"issue":"3","key":"26_CR51","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1007\/s11633-023-1474-0","volume":"21","author":"H Zhao","year":"2024","unstructured":"Zhao, H., Zhang, J., Chen, Z., Yuan, B., Tao, D.: On robust cross-view consistency in self-supervised monocular depth estimation. Mach. Intell. Res. 21(3), 495\u2013513 (2024)","journal-title":"Mach. Intell. Res."},{"key":"26_CR52","doi-asserted-by":"crossref","unstructured":"Zou, Z.X., et al.: Triplane meets gaussian splatting: fast and generalizable single-view 3D reconstruction with transformers. arXiv preprint arXiv:2312.09147 (2023)","DOI":"10.1109\/CVPR52733.2024.00983"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73242-3_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T10:29:33Z","timestamp":1732962573000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73242-3_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031732416","9783031732423"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73242-3_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}