{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:44:10Z","timestamp":1778694250649,"version":"3.51.4"},"publisher-location":"Cham","reference-count":64,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732317","type":"print"},{"value":"9783031732324","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73232-4_25","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T06:01:53Z","timestamp":1727589713000},"page":"439-457","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":135,"title":["SV3D: Novel Multi-view Synthesis and\u00a03D Generation from\u00a0a\u00a0Single Image Using Latent Video Diffusion"],"prefix":"10.1007","author":[{"given":"Vikram","family":"Voleti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun-Han","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Boss","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adam","family":"Letts","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Pankratz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dmitry","family":"Tochilkin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Laforte","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robin","family":"Rombach","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Varun","family":"Jampani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"25_CR2","unstructured":"Blattmann, A., et\u00a0al.: Stable Video Diffusion: scaling latent video diffusion models to large datasets. arXiv preprint arXiv:2311.15127 (2023)"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your Latents: high-resolution video synthesis with latent diffusion models. arXiv:2304.08818 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"25_CR4","doi-asserted-by":"crossref","unstructured":"Boss, M., Braun, R., Jampani, V., Barron, J.T., Liu, C., Lensch, H.P.: NeRD: neural reflectance decomposition from image collections. In: IEEE International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01245"},{"key":"25_CR5","unstructured":"Boss, M., et al.: SAMURAI: shape and material from unconstrained real-world arbitrary image collections. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et al.: GeNVS: generative novel view synthesis with 3D-aware diffusion models. In: International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00389"},{"issue":"4","key":"25_CR7","doi-asserted-by":"publisher","first-page":"1421","DOI":"10.1214\/aos\/1176351046","volume":"16","author":"DB Cline","year":"1988","unstructured":"Cline, D.B.: Admissibile kernel estimators of a multivariate density. Ann. Stat. 16(4), 1421\u20131427 (1988)","journal-title":"Ann. Stat."},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Deitke, M., et\u00a0al.: Objaverse-XL: a universe of 10m+ 3D objects. arXiv preprint arXiv:2307.05663 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"25_CR10","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3D scanned household items. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2553\u20132560. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Eftekhar, A., Sax, A., Bachmann, R., Malik, J., Zamir, A.: Omnidata: a scalable pipeline for making multi-task mid-level vision datasets from 3D scans. In: IEEE International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01061"},{"key":"25_CR12","unstructured":"Girdhar, R., et al.: EMU VIDEO: factorizing text-to-video generation by explicit image conditioning (2023). https:\/\/emu-video.metademolab.com\/assets\/emu_video.pdf"},{"key":"25_CR13","unstructured":"Hasselgren, J., Hofmann, N., Munkberg, J.: Shape, light, and material decomposition from images using monte carlo rendering and denoising. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"25_CR14","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems (2020)"},{"key":"25_CR15","unstructured":"Hong, Y., et al.: LRM: Large reconstruction model for single image to 3D. In: International Conference on Learning Representations (2024)"},{"key":"25_CR16","unstructured":"Jun, H., Nichol, A.: Shap-E: Generating conditional 3D implicit functions (2023)"},{"issue":"2","key":"25_CR17","doi-asserted-by":"publisher","first-page":"358","DOI":"10.1109\/4.996","volume":"23","author":"N Kanopoulos","year":"1988","unstructured":"Kanopoulos, N., Vasanthavada, N., Baker, R.L.: Design of an image edge detection filter using the sobel operator. IEEE J. Solid-State Circuits 23(2), 358\u2013367 (1988)","journal-title":"IEEE J. Solid-State Circuits"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Kant, Y., et al.: Spad : spatially aware multiview diffusers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.00956"},{"key":"25_CR19","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the Design Space of Diffusion-Based Generative Models. arXiv:2206.00364 (2022)"},{"key":"25_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Kong, X., Liu, S., Lyu, X., Taher, M., Qi, X., Davison, A.J.: Eschernet: a generative model for scalable view synthesis. arXiv preprint arXiv:2402.03908 (2024)","DOI":"10.1109\/CVPR52733.2024.00908"},{"key":"25_CR22","doi-asserted-by":"crossref","unstructured":"Kwak, J.G., Dong, E., Jin, Y., Ko, H., Mahajan, S., Yi, K.M.: Vivid-1-to-3: novel view synthesis with video diffusion models. In: IEEE Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.00647"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: High-resolution text-to-3D content creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Liu, M., et al.: One-2-3-45++: fast single image to 3D objects with consistent multi-view generation and 3D diffusion. arXiv preprint arXiv:2311.07885 (2023)","DOI":"10.1109\/CVPR52733.2024.00960"},{"key":"25_CR25","unstructured":"Liu, M., et al.: One-2-3-45: any single image to 3D mesh in 45 seconds without per-shape optimization. In: Advances in Neural Information Processing Systems, vol. 36 (2023)"},{"key":"25_CR26","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Hoorick, B.V., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"25_CR27","unstructured":"Liu, Y., et al.: SyncDreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3D: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"25_CR29","unstructured":"Melas-Kyriazi, L., et al.: IM-3D: iterative multiview diffusion and reconstruction for high-quality 3D generation. arXiv preprint arXiv:2402.08682 (2024)"},{"key":"25_CR30","unstructured":"Mercier, A., et al.: HexaGen3D: stablediffusion is just one step away from fast and diverse Text-to-3D generation. arXiv preprint arXiv:2401.07727 (2024)"},{"key":"25_CR31","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., Cohen-Or, D.: Latent-NeRF for shape-guided generation of 3D shapes and textures. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12663\u201312673 (2023)","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"25_CR32","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: European Conference on Computer Vision, pp. 405\u2013421 (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"25_CR33","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530127","volume-title":"Instant Neural Graphics Primitives with a Multiresolution Hash Encoding","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant Neural Graphics Primitives with a Multiresolution Hash Encoding. ACM Trans, Graph (2022)"},{"key":"25_CR34","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-E: A System for Generating 3D Point Clouds from Complex Prompts (2022)"},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Barron, J.T., Mildenhall, B., Sajjadi, M.S.M., Geiger, A., Radwan, N.: RegNeRF: regularizing neural radiance fields for view synthesis from sparse inputs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.00540"},{"key":"25_CR36","unstructured":"Oquab, M., et\u00a0al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"25_CR37","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. arXiv (2022)"},{"key":"25_CR38","unstructured":"Qian, G et\u00a0al.: Magic123: one image to high-quality 3D object generation using both 2D and 3D diffusion priors. arXiv preprint arXiv:2306.17843 (2023)"},{"key":"25_CR39","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"25_CR40","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"25_CR41","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image dissusion models for subject-driven generation. arXiv preprint arXiv:2208.12242 (2022)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"25_CR42","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv:2205.11487 (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"25_CR43","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"25_CR44","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3D shape synthesis. In: Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"25_CR45","unstructured":"Shi, R., et al.: Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)"},{"key":"25_CR46","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: MVDream: multi-view diffusion for 3d generation. arXiv preprint arXiv:2308.16512 (2023)"},{"key":"25_CR47","unstructured":"Song, Y., Ermon, S.: Improved Techniques for Training Score-Based Generative Models. arXiv:2006.09011 (2020)"},{"key":"25_CR48","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"25_CR49","unstructured":"StabilityAI: Stable Zero123 (2023). https:\/\/stability.ai\/news\/stable-zero123-3d-generation"},{"key":"25_CR50","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: DreamGaussian: generative Gaussian splatting for efficient 3D content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"25_CR51","doi-asserted-by":"crossref","unstructured":"Tang, S., et al.: Mvdiffusion++: a dense high-resolution multi-view diffusion model for single or sparse-view 3D object reconstruction. arXiv preprint arXiv:2402.12712 (2024)","DOI":"10.1007\/978-3-031-72640-8_10"},{"key":"25_CR52","doi-asserted-by":"crossref","unstructured":"Tsai, Y.T., Shih, Z.C.: All-frequency precomputed radiance transfer using spherical radial basis functions and clustered tensor approximation. ACM Trans. Graph. (ToG) 25(3), 976 (2006)","DOI":"10.1145\/1141911.1141981"},{"key":"25_CR53","unstructured":"Voleti, V., Jolicoeur-Martineau, A., Pal, C.: MCVD: masked conditional video diffusion for prediction, generation, and interpolation. In: (NeurIPS) Advances in Neural Information Processing Systems (2022)"},{"key":"25_CR54","unstructured":"Watson, D., Chan, W., Martin-Brualla, R., Ho, J., Tagliasacchi, A., Norouzi, M.: Novel view synthesis with diffusion models (2022)"},{"key":"25_CR55","unstructured":"Weng, H., et al.: Consistent123: improve consistency for one image to 3D object synthesis (2023)"},{"key":"25_CR56","doi-asserted-by":"crossref","unstructured":"Wu, T., et\u00a0al.: Omniobject3D: large-vocabulary 3D object dataset for realistic perception, reconstruction and generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 803\u2013814 (2023)","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"25_CR57","doi-asserted-by":"crossref","unstructured":"Yang, J., Cheng, Z., Duan, Y., Ji, P., Li, H.: ConsistNet: enforcing 3D consistency for multi-view images diffusion. arXiv preprint arXiv:2310.10343 (2023)","DOI":"10.1109\/CVPR52733.2024.00676"},{"key":"25_CR58","unstructured":"Yao, C.H., et al.: ARTIC3D: learning robust articulated 3D shapes from noisy web image collections. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"25_CR59","doi-asserted-by":"crossref","unstructured":"Ye, J., Wang, P., Li, K., Shi, Y., Wang, H.: Consistent-1-to-3: consistent image to 3D view synthesis via geometry-aware diffusion models. In: 3DV (2024)","DOI":"10.1109\/3DV62453.2024.00027"},{"key":"25_CR60","unstructured":"Young, J.: xatlas. https:\/\/github.com\/jpcy\/xatlas (2024)"},{"key":"25_CR61","unstructured":"Yu, Z., Peng, S., Niemeyer, M., Sattler, T., Geiger, A.: MonoSDF: exploring monocular geometric cues for neural implicit surface reconstruction. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"25_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, K., Luan, F., Wang, Q., Bala, K., Snavely, N.: PhySG: inverse rendering with spherical gaussians for physics-based material editing and relighting. In: The IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00541"},{"key":"25_CR63","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"25_CR64","doi-asserted-by":"crossref","unstructured":"Zheng, C., Vedaldi, A.: Free3D: consistent novel view synthesis without 3D representation. arXiv preprint arXiv:2312.04551 (2023)","DOI":"10.1109\/CVPR52733.2024.00928"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73232-4_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T21:15:46Z","timestamp":1732828546000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73232-4_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031732317","9783031732324"],"references-count":64,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73232-4_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}