{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:24:40Z","timestamp":1773771880739,"version":"3.50.1"},"publisher-location":"Cham","reference-count":112,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726323","type":"print"},{"value":"9783031726330","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72633-0_1","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T07:54:38Z","timestamp":1732175678000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":66,"title":["GRM: Large Gaussian Reconstruction Model for\u00a0Efficient 3D Reconstruction and\u00a0Generation"],"prefix":"10.1007","author":[{"given":"Yinghao","family":"Xu","sequence":"first","affiliation":[]},{"given":"Zifan","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Wang","family":"Yifan","sequence":"additional","affiliation":[]},{"given":"Hansheng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ceyuan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Sida","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Yujun","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Gordon","family":"Wetzstein","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Abdal, R., et al.: Gaussian shell maps for efficient 3D human generation. arXiv preprint arXiv:2311.17857 (2023)","DOI":"10.1109\/CVPR52733.2024.00902"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Anciukevi\u010dius, T., et al.: RenderDiffusion: image diffusion for 3D reconstruction, inpainting and generation. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01213"},{"key":"1_CR3","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: LongFormer: the long-document transformer. arXiv preprint arXiv:2004.05150 (2020)"},{"key":"1_CR4","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et\u00a0al.: Efficient geometry-aware 3D generative adversarial networks. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Chan, E.R., Monteiro, M., Kellnhofer, P., Wu, J., Wetzstein, G.: pi-GAN: periodic implicit generative adversarial networks for 3D-aware image synthesis. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00574"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et al.: Generative novel view synthesis with 3D-aware diffusion models. International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00389"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Charatan, D., Li, S., Tagliasacchi, A., Sitzmann, V.: pixelSplat: 3D Gaussian splats from image pairs for scalable generalizable 3D reconstruction. arXiv preprint arXiv:2312.12337 (2023)","DOI":"10.1109\/CVPR52733.2024.01840"},{"key":"1_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1007\/978-3-031-19824-3_20","volume-title":"Computer Vision \u2013 ECCV 2022","author":"A Chen","year":"2022","unstructured":"Chen, A., Xu, Z., Geiger, A., Yu, J., Su, H.: TensoRF: tensorial radiance fields. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 333\u2013350. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_20"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Chen, A., et al.: MVSNeRF: Fast generalizable radiance field reconstruction from multi-view stereo. In: International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01386"},{"key":"1_CR11","unstructured":"Chen, G., Wang, W.: A survey on 3D gaussian splatting. arXiv preprint arXiv:2401.03890 (2024)"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Chen, H., et al.: Single-stage diffusion NeRF: a unified approach to 3D generation and reconstruction. arXiv preprint arXiv:2304.06714 (2023)","DOI":"10.1109\/ICCV51070.2023.00229"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation. arXiv preprint arXiv:2303.13873 (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, F., Liu, H.: Text-to-3D using Gaussian splatting. arXiv preprint arXiv:2309.16585 (2023)","DOI":"10.1109\/CVPR52733.2024.02022"},{"key":"1_CR15","unstructured":"Chung, J., Lee, S., Nam, H., Lee, J., Lee, K.M.: LucidDreamer: domain-free generation of 3D gaussian splatting scenes. arXiv preprint arXiv:2311.13384 (2023)"},{"key":"1_CR16","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"1_CR17","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3D scanned household items. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2553\u20132560. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Fei, B., Xu, J., Zhang, R., Zhou, Q., Yang, W., He, Y.: 3D Gaussian as a new vision era: a survey. arXiv preprint arXiv:2402.07181 (2024)","DOI":"10.1109\/TVCG.2024.3397828"},{"key":"1_CR20","unstructured":"Gao, J., et al.: GET3D: a generative model of high quality 3D textured shapes learned from images. In: Advances in Neural Information Processing Systems (2022)"},{"key":"1_CR21","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems (2014)"},{"key":"1_CR22","unstructured":"Gu, J., Liu, L., Wang, P., Theobalt, C.: StyleNeRF: a style-based 3d-aware generator for high-resolution image synthesis. arXiv preprint arXiv:2110.08985 (2021)"},{"key":"1_CR23","unstructured":"Gu, J., et al.: NeRFDiff: single-image view synthesis with nerf-guided distillation from 3D-aware diffusion. In: International Conference on Machine Learning (2023)"},{"key":"1_CR24","unstructured":"Gupta, A., Xiong, W., Nie, Y., Jones, I., O\u011fuz, B.: 3DGen: triplane latent diffusion for textured mesh generation. arXiv preprint arXiv:2303.05371 (2023)"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Hertz, A., Aberman, K., Cohen-Or, D.: Delta denoising score. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2328\u20132337 (2023)","DOI":"10.1109\/ICCV51070.2023.00221"},{"key":"1_CR26","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1_CR27","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems (2020)"},{"key":"1_CR28","unstructured":"Hong, Y., et al.: LRM: large reconstruction model for single image to 3D. arXiv preprint arXiv:2311.04400 (2023)"},{"key":"1_CR29","doi-asserted-by":"crossref","unstructured":"Hu, L., et al.: GaussianAvatar: towards realistic human avatar modeling from a single video via animatable 3D gaussians. arXiv preprint arXiv:2312.02134 (2023)","DOI":"10.1109\/CVPR52733.2024.00067"},{"key":"1_CR30","doi-asserted-by":"crossref","unstructured":"Jain, A., Mildenhall, B., Barron, J.T., Abbeel, P., Poole, B.: Zero-shot text-guided object generation with dream fields. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Jain, A., Tancik, M., Abbeel, P.: Putting NeRF on a diet: semantically consistent few-shot view synthesis. In: International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00583"},{"key":"1_CR32","unstructured":"Jia, Y.B.: Pl\u00fccker coordinates for lines in the space. Problem Solver Techniques for Applied Computer Science, Com-S-477\/577 Course Handout (2020)"},{"key":"1_CR33","unstructured":"Jiang, H., Jiang, Z., Zhao, Y., Huang, Q.: LEAP: liberate sparse-view 3D modeling from camera poses. In: International Conference on Learning Representation (2024)"},{"key":"1_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"1_CR35","unstructured":"Jun, H., Nichol, A.: Shap-E: generating conditional 3D implicit functions. arXiv preprint arXiv:2305.02463 (2023)"},{"key":"1_CR36","doi-asserted-by":"crossref","unstructured":"Kang, M., et al.: Scaling up GANs for text-to-image synthesis. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"1_CR37","doi-asserted-by":"crossref","unstructured":"Karnewar, A., Vedaldi, A., Novotny, D., Mitra, N.J.: HoloDiffusion: training a 3D diffusion model using 2D images. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01767"},{"key":"1_CR38","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive growing of GANs for improved quality, stability, and variation. In: International Conference on Learning Representation (2018)"},{"key":"1_CR39","unstructured":"Karras, T., et al.: Alias-free generative adversarial networks. In: Advances in Neural Information Processing Systems (2021)"},{"key":"1_CR40","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"1_CR41","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of StyleGAN. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4) (2023)","DOI":"10.1145\/3592433"},{"key":"1_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"596","DOI":"10.1007\/978-3-031-19824-3_35","volume-title":"Computer Vision \u2013 ECCV 2022","author":"L Keselman","year":"2022","unstructured":"Keselman, L., Hebert, M.: Approximate differentiable rendering with algebraic surfaces. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 596\u2013614. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_35"},{"key":"1_CR44","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"1_CR45","unstructured":"Li, J., et al.: Instant3D: fast text-to-3D with sparse-view generation and large reconstruction model (2023). https:\/\/arxiv.org\/abs\/2311.06214"},{"key":"1_CR46","unstructured":"Li, X., Wang, H., Tseng, K.K.: GaussianDiffusion: 3D gaussian splatting for denoising diffusion probabilistic models with structured noise. arXiv preprint arXiv:2311.11221 (2023)"},{"key":"1_CR47","doi-asserted-by":"crossref","unstructured":"Li, Z., Zheng, Z., Wang, L., Liu, Y.: Animatable gaussians: learning pose-dependent gaussian maps for high-fidelity human avatar modeling. arXiv preprint arXiv:2311.16096 (2023)","DOI":"10.1109\/CVPR52733.2024.01864"},{"key":"1_CR48","doi-asserted-by":"crossref","unstructured":"Liang, Y., Yang, X., Lin, J., Li, H., Xu, X., Chen, Y.: LucidDreamer: towards high-fidelity text-to-3D generation via interval score matching. arXiv preprint arXiv:2311.11284 (2023)","DOI":"10.1109\/CVPR52733.2024.00623"},{"key":"1_CR49","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: high-resolution text-to-3D content creation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"1_CR50","doi-asserted-by":"crossref","unstructured":"Lin, K.E., Yen-Chen, L., Lai, W.S., Lin, T.Y., Shih, Y.C., Ramamoorthi, R.: Vision transformer for NeRF-based view synthesis from a single input image. In: IEEE Winter Conference on Applications of Computer Vision (2023)","DOI":"10.1109\/WACV56688.2023.00087"},{"key":"1_CR51","doi-asserted-by":"crossref","unstructured":"Ling, H., Kim, S.W., Torralba, A., Fidler, S., Kreis, K.: Align your gaussians: text-to-4D with dynamic 3D gaussians and composed diffusion models. arXiv preprint arXiv:2312.13763 (2023)","DOI":"10.1109\/CVPR52733.2024.00819"},{"key":"1_CR52","doi-asserted-by":"crossref","unstructured":"Liu, M., et al.: One-2-3-45++: fast single image to 3D objects with consistent multi-view generation and 3D diffusion. arXiv preprint arXiv:2311.07885 (2023)","DOI":"10.1109\/CVPR52733.2024.00960"},{"key":"1_CR53","unstructured":"Liu, M., et al.: One-2-3-45: any single image to 3D mesh in 45 seconds without per-shape optimization (2023)"},{"key":"1_CR54","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"1_CR55","unstructured":"Liu, Y., et al.: SyncDreamer: generating multiview-consistent images from a single-view image. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"1_CR56","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1_CR57","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3D: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"1_CR58","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-031-19824-3_13","volume-title":"Computer Vision \u2013 ECCV 2022","author":"X Long","year":"2022","unstructured":"Long, X., Lin, C., Wang, P., Komura, T., Wang, W.: SparseNeuS: fast generalizable neural surface reconstruction from sparse views. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 21\u2013227. Springer, Cham (2022)"},{"key":"1_CR59","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"1_CR60","doi-asserted-by":"crossref","unstructured":"Luiten, J., Kopanas, G., Leibe, B., Ramanan, D.: Dynamic 3D gaussians: tracking by persistent dynamic view synthesis. arXiv preprint arXiv:2308.09713 (2023)","DOI":"10.1109\/3DV62453.2024.00044"},{"key":"1_CR61","doi-asserted-by":"crossref","unstructured":"Mescheder, L., Oechsle, M., Niemeyer, M., Nowozin, S., Geiger, A.: Occupancy networks: learning 3D reconstruction in function space. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00459"},{"key":"1_CR62","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"key":"1_CR63","doi-asserted-by":"publisher","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. 41(4), 102:1\u2013102:15 (2022). https:\/\/doi.org\/10.1145\/3528223.3530127","DOI":"10.1145\/3528223.3530127"},{"key":"1_CR64","doi-asserted-by":"crossref","unstructured":"Nguyen-Phuoc, T., Li, C., Theis, L., Richardt, C., Yang, Y.L.: HoloGAN: unsupervised learning of 3D representations from natural images. In: International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00768"},{"key":"1_CR65","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-E: a system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)"},{"key":"1_CR66","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Geiger, A.: GIRAFFE: representing scenes as compositional generative neural feature fields. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01129"},{"key":"1_CR67","unstructured":"Ntavelis, E., Siarohin, A., Olszewski, K., Wang, C., Van\u00a0Gool, L., Tulyakov, S.: Autodecoding latent 3D diffusion models. arXiv preprint arXiv:2307.05445 (2023)"},{"key":"1_CR68","doi-asserted-by":"crossref","unstructured":"Park, J.J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: DeepSDF: learning continuous signed distance functions for shape representation. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"1_CR69","unstructured":"Po, R., et\u00a0al.: State of the art on diffusion models for visual computing. arXiv preprint arXiv:2310.07204 (2023)"},{"key":"1_CR70","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: text-to-3D using 2D diffusion. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"1_CR71","doi-asserted-by":"crossref","unstructured":"Qian, S., Kirschstein, T., Schoneveld, L., Davoli, D., Giebenhain, S., Nie\u00dfner, M.: GaussianAvatars: photorealistic head avatars with rigged 3D gaussians. arXiv preprint arXiv:2312.02069 (2023)","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"1_CR72","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"1_CR73","unstructured":"Ren, J., et al.: DreamGaussian4D: generative 4D Gaussian splatting. arXiv preprint arXiv:2312.17142 (2023)"},{"key":"1_CR74","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"1_CR75","doi-asserted-by":"crossref","unstructured":"Saito, S., Schwartz, G., Simon, T., Li, J., Nam, G.: Relightable gaussian codec avatars. arXiv preprint arXiv:2312.03704 (2023)","DOI":"10.1109\/CVPR52733.2024.00021"},{"key":"1_CR76","unstructured":"Schwarz, K., Liao, Y., Niemeyer, M., Geiger, A.: GRAF: generative radiance fields for 3D-aware image synthesis. In: Advances in Neural Information Processing Systems (2020)"},{"key":"1_CR77","doi-asserted-by":"crossref","unstructured":"Shen, B., et al.: GINA-3D: learning to generate implicit neural assets in the wild. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 4913\u20134926 (2023)","DOI":"10.1109\/CVPR52729.2023.00476"},{"key":"1_CR78","unstructured":"Shi, R., et al.: Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)"},{"key":"1_CR79","doi-asserted-by":"crossref","unstructured":"Shi, W., et al.: Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1874\u20131883 (2016)","DOI":"10.1109\/CVPR.2016.207"},{"key":"1_CR80","unstructured":"Shi, Y., Wang, P., Ye, J., Mai, L., Li, K., Yang, X.: MVDream: multi-view diffusion for 3D generation. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"1_CR81","unstructured":"Shi, Z., Peng, S., Xu, Y., Andreas, G., Liao, Y., Shen, Y.: Deep generative models on 3D representations: a survey. arXiv preprint arXiv:2210.15663 (2022)"},{"key":"1_CR82","doi-asserted-by":"crossref","unstructured":"Shue, J.R., Chan, E.R., Po, R., Ankner, Z., Wu, J., Wetzstein, G.: 3D neural field generation using triplane diffusion. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.02000"},{"key":"1_CR83","unstructured":"Sitzmann, V., Martel, J., Bergman, A., Lindell, D., Wetzstein, G.: Implicit neural representations with periodic activation functions. In: Advances in Neural Information Processing Systems, vol. 33, pp. 7462\u20137473 (2020)"},{"key":"1_CR84","unstructured":"Sitzmann, V., Rezchikov, S., Freeman, B., Tenenbaum, J., Durand, F.: Light field networks: neural scene representations with single-evaluation rendering. In: NeurIPS (2021)"},{"key":"1_CR85","unstructured":"Sitzmann, V., Zollh\u00f6fer, M., Wetzstein, G.: Scene representation networks: continuous 3D-structure-aware neural scene representations. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"1_CR86","unstructured":"Skorokhodov, I., Siarohin, A., Xu, Y., Ren, J., Lee, H.Y., Wonka, P., Tulyakov, S.: 3D generation on ImageNet. In: International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=U2WjB9xxZ9q"},{"key":"1_CR87","unstructured":"Skorokhodov, I., Tulyakov, S., Wang, Y., Wonka, P.: EpiGRAF: rethinking training of 3D GANs. In: In: Advances in Neural Information Processing Systems (2022)"},{"key":"1_CR88","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"1_CR89","doi-asserted-by":"crossref","unstructured":"Szymanowicz, S., Rupprecht, C., Vedaldi, A.: Splatter image: ultra-fast single-view 3D reconstruction. arXiv preprint arXiv:2312.13150 (2023)","DOI":"10.1109\/CVPR52733.2024.00972"},{"key":"1_CR90","doi-asserted-by":"crossref","unstructured":"Szymanowicz, S., Rupprecht, C., Vedaldi, A.: Viewset diffusion:(0-) image-conditioned 3D generative models from 2D data. arXiv preprint arXiv:2306.07881 (2023)","DOI":"10.1109\/ICCV51070.2023.00814"},{"key":"1_CR91","doi-asserted-by":"crossref","unstructured":"Tang, J., Chen, Z., Chen, X., Wang, T., Zeng, G., Liu, Z.: LGM: large multi-view gaussian model for high-resolution 3D content creation. arXiv preprint arXiv:2402.05054 (2024)","DOI":"10.1007\/978-3-031-73235-5_1"},{"key":"1_CR92","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: DreamGaussian: generative gaussian splatting for efficient 3D content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"1_CR93","doi-asserted-by":"crossref","unstructured":"Tang, J., et al.: Make-it-3D: high-fidelity 3D creation from a single image with diffusion prior. arXiv preprint arXiv:2303.14184 (2023)","DOI":"10.1109\/ICCV51070.2023.02086"},{"key":"1_CR94","doi-asserted-by":"crossref","unstructured":"Tewari, A., et\u00a0al.: Advances in neural rendering. In: Computer Graphics Forum, pp. 703\u2013735 (2022)","DOI":"10.1111\/cgf.14507"},{"key":"1_CR95","unstructured":"Tewari, A., et al.: Diffusion with forward models: solving stochastic inverse problems without direct supervision. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"1_CR96","unstructured":"Tosi, F., et al.: How NeRFs and 3D gaussian splatting are reshaping SLAM: a survey. arXiv preprint arXiv:2402.13255 (2024)"},{"key":"1_CR97","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"1_CR98","unstructured":"Wang, P., et al.: PF-LRM: pose-free large reconstruction model for joint pose and shape prediction. arXiv preprint arXiv:2311.12024 (2023)"},{"key":"1_CR99","doi-asserted-by":"crossref","unstructured":"Wang, Q., et al.: IBRNet: learning multi-view image-based rendering. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00466"},{"key":"1_CR100","unstructured":"Wang, Z., et al.: ProlificDreamer: high-fidelity and diverse text-to-3d generation with variational score distillation. arXiv preprint arXiv:2305.16213 (2023)"},{"key":"1_CR101","doi-asserted-by":"crossref","unstructured":"Wu, G., et al.: 4D gaussian splatting for real-time dynamic scene rendering. arXiv preprint arXiv:2310.08528 (2023)","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"1_CR102","unstructured":"Xu, D., et al.: AGG: amortized generative 3D gaussians for single image to 3D. arXiv preprint arXiv:2401.04099 (2024)"},{"key":"1_CR103","doi-asserted-by":"crossref","unstructured":"Xu, Y., et\u00a0al.: DisCoScene: spatially disentangled generative radiance fields for controllable 3d-aware scene synthesis. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00428"},{"key":"1_CR104","doi-asserted-by":"crossref","unstructured":"Xu, Y., Peng, S., Yang, C., Shen, Y., Zhou, B.: 3D-aware image synthesis via learning structural and textural representations. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01788"},{"key":"1_CR105","unstructured":"Xu, Y., et\u00a0al.: DMV3D: denoising multi-view diffusion using 3D large reconstruction model. arXiv preprint arXiv:2311.09217 (2023)"},{"key":"1_CR106","unstructured":"Yang, Z., Yang, H., Pan, Z., Zhu, X., Zhang, L.: Real-time photorealistic dynamic scene representation and rendering with 4D gaussian splatting. arXiv preprint arXiv:2310.10642 (2023)"},{"key":"1_CR107","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gao, X., Zhou, W., Jiao, S., Zhang, Y., Jin, X.: Deformable 3D gaussians for high-fidelity monocular dynamic scene reconstruction. arXiv preprint arXiv:2309.13101 (2023)","DOI":"10.1109\/CVPR52733.2024.01922"},{"key":"1_CR108","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: PixelNeRF: neural radiance fields from one or few images. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"1_CR109","doi-asserted-by":"crossref","unstructured":"Zhang, K., et al.: ARF: artistic radiance fields (2022)","DOI":"10.1007\/978-3-031-19821-2_41"},{"key":"1_CR110","unstructured":"Zhu, J., Yang, C., Zheng, K., Xu, Y., Shi, Z., Shen, Y.: Exploring sparse MoE in GANs for text-conditioned image synthesis. arXiv preprint arXiv:2309.03904 (2023)"},{"key":"1_CR111","unstructured":"Zielonka, W., Bagautdinov, T., Saito, S., Zollh\u00f6fer, M., Thies, J., Romero, J.: Drivable 3D Gaussian avatars. arXiv preprint arXiv:2311.08581 (2023)"},{"key":"1_CR112","doi-asserted-by":"crossref","unstructured":"Zou, Z.X., et al.: Triplane meets Gaussian splatting: fast and generalizable single-view 3D reconstruction with transformers. arXiv preprint arXiv:2312.09147 (2023)","DOI":"10.1109\/CVPR52733.2024.00983"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72633-0_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T08:04:06Z","timestamp":1732176246000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72633-0_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031726323","9783031726330"],"references-count":112,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72633-0_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}