{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:43:17Z","timestamp":1777657397260,"version":"3.51.4"},"publisher-location":"Cham","reference-count":92,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730382","type":"print"},{"value":"9783031730399","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73039-9_24","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:57:07Z","timestamp":1730300227000},"page":"416-435","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Viewpoint Textual Inversion: Discovering Scene Representations and\u00a03D View Control in\u00a02D Diffusion Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0823-2848","authenticated-orcid":false,"given":"James","family":"Burgess","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6785-8146","authenticated-orcid":false,"given":"Kuan-Chieh","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0529-0628","authenticated-orcid":false,"given":"Serena","family":"Yeung-Levy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Alaluf, Y., Richardson, E., Metzer, G., Cohen-Or, D.: A neural space-time representation for text-to-image personalization. arXiv preprint arXiv:2305.15391 (2023)","DOI":"10.1145\/3618322"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Anciukevi\u010dius, T., et al.: Renderdiffusion: image diffusion for 3d reconstruction, inpainting and generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12608\u201312618 (2023)","DOI":"10.1109\/CVPR52729.2023.01213"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. arXiv preprint arXiv:2305.16311 (2023)","DOI":"10.1145\/3610548.3618154"},{"key":"24_CR4","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"24_CR5","unstructured":"Brooks, T., et al.: Video generation models as world simulators (2024). https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et al.: Generative novel view synthesis with 3d-aware diffusion models. arXiv preprint arXiv:2304.02602 (2023)","DOI":"10.1109\/ICCV51070.2023.00389"},{"key":"24_CR7","unstructured":"Chang, A.X., et\u00a0al.: Shapenet: an information-rich 3d model repository. arXiv preprint arXiv:1512.03012 (2015)"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Chen, A., et al.: Mvsnerf: fast generalizable radiance field reconstruction from multi-view stereo. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14124\u201314133 (2021)","DOI":"10.1109\/ICCV48922.2021.01386"},{"key":"24_CR9","unstructured":"Chen, Y., Vi\u00e9gas, F., Wattenberg, M.: Beyond surface statistics: scene representations in a latent diffusion model. arXiv preprint arXiv:2306.05720 (2023)"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, T.Y., et al.: Learning continuous 3d words for text-to-image generation. arXiv preprint arXiv:2402.08654 (2024)","DOI":"10.1109\/CVPR52733.2024.00645"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Chibane, J., Bansal, A., Lazova, V., Pons-Moll, G.: Stereo radiance fields (SRF): learning view synthesis for sparse views of novel scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7911\u20137920 (2021)","DOI":"10.1109\/CVPR46437.2021.00782"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5828\u20135839 (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3d objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Deng, C., et\u00a0al.: Nerdi: single-view nerf synthesis with language-guided diffusion as general image priors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20637\u201320647 (2023)","DOI":"10.1109\/CVPR52729.2023.01977"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Deng, K., Liu, A., Zhu, J.Y., Ramanan, D.: Depth-supervised nerf: fewer views and faster training for free. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12882\u201312891 (2022)","DOI":"10.1109\/CVPR52688.2022.01254"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"El\u00a0Banani, M., et al.: Probing the 3d awareness of visual foundation models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21795\u201321806 (2024)","DOI":"10.1109\/CVPR52733.2024.02059"},{"key":"24_CR17","unstructured":"Epstein, D., Jabri, A., Poole, B., Efros, A., Holynski, A.: Diffusion self-guidance for controllable image generation. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"24_CR18","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Encoder-based domain tuning for fast personalization of text-to-image models. arXiv preprint arXiv:2302.12228 (2023)","DOI":"10.1145\/3592133"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Encoder-based domain tuning for fast personalization of text-to-image models. arXiv preprint arXiv:2302.12228 (2023)","DOI":"10.1145\/3592133"},{"key":"24_CR21","unstructured":"Hedlin, E., et al.: Unsupervised semantic correspondence using stable diffusion. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"24_CR22","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)"},{"key":"24_CR23","unstructured":"Ho, J., et\u00a0al.: Imagen video: high definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"24_CR24","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR25","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"24_CR26","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Jain, A., Tancik, M., Abbeel, P.: Putting nerf on a diet: semantically consistent few-shot view synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5885\u20135894 (2021)","DOI":"10.1109\/ICCV48922.2021.00583"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Jensen, R., Dahl, A., Vogiatzis, G., Tola, E., Aan\u00e6s, H.: Large scale multi-view stereopsis evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 406\u2013413 (2014)","DOI":"10.1109\/CVPR.2014.59"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Karnewar, A., Vedaldi, A., Novotny, D., Mitra, N.J.: Holodiffusion: training a 3d diffusion model using 2d images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18423\u201318433 (2023)","DOI":"10.1109\/CVPR52729.2023.01767"},{"key":"24_CR30","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Lin, K.E., Lin, Y.C., Lai, W.S., Lin, T.Y., Shih, Y.C., Ramamoorthi, R.: Vision transformer for nerf-based view synthesis from a single input image. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 806\u2013815 (2023)","DOI":"10.1109\/WACV56688.2023.00087"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Hoorick, B.V., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3d object (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"24_CR34","unstructured":"Liu, Y., Lin, C., Zeng, Z., Long, X., Liu, L., Komura, T., Wang, W.: Syncdreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"24_CR35","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"24_CR36","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: Dpm-solver: a fast ode solver for diffusion probabilistic model sampling in around 10 steps. arXiv preprint arXiv:2206.00927 (2022)"},{"key":"24_CR37","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: DPM-solver++: fast solver for guided sampling of diffusion probabilistic models. arXiv preprint arXiv:2211.01095 (2022)"},{"key":"24_CR38","unstructured":"Luo, G., Dunlap, L., Park, D.H., Holynski, A., Darrell, T.: Diffusion hyperfeatures: searching through time and space for semantic correspondence. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Laina, I., Rupprecht, C., Vedaldi, A.: Realfusion: 360deg reconstruction of any object from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8446\u20138455 (2023)","DOI":"10.1109\/CVPR52729.2023.00816"},{"key":"24_CR40","unstructured":"Meng, C., et al.: Sdedit: guided image synthesis and editing with stochastic differential equations. In: International Conference on Learning Representations (2021)"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., et al.: Local light field fusion: practical view synthesis with prescriptive sampling guidelines. ACM Trans. Graph. 38(4), 1\u201314 (2019)","DOI":"10.1145\/3306346.3322980"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Barron, J.T., Mildenhall, B., Sajjadi, M.S., Geiger, A., Radwan, N.: Regnerf: regularizing neural radiance fields for view synthesis from sparse inputs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5480\u20135490 (2022)","DOI":"10.1109\/CVPR52688.2022.00540"},{"key":"24_CR44","unstructured":"von Platen, P., et al.: Diffusers: State-of-the-Art Diffusion Models (2022)"},{"key":"24_CR45","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"24_CR46","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"24_CR47","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"24_CR48","unstructured":"Rahimi, A., Recht, B.: Random features for large-scale kernel machines. Adv. Neural Inf. Process. Syst. 20 (2007)"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotny, D.: Common objects in 3d: large-scale learning and evaluation of real-life 3d category reconstruction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10901\u201310911 (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"24_CR50","unstructured":"Rezende, D.J., Mohamed, S., Wierstra, D.: Stochastic backpropagation and approximate inference in deep generative models. In: International Conference on Machine Learning, pp. 1278\u20131286. PMLR (2014)"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Roessle, B., Barron, J.T., Mildenhall, B., Srinivasan, P.P., Nie\u00dfner, M.: Dense depth priors for neural radiance fields from sparse input views. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12892\u201312901 (2022)","DOI":"10.1109\/CVPR52688.2022.01255"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"24_CR53","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, 5\u20139 October 2015, Proceedings, Part III 18, pp. 234\u2013241. Springer, Cham (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"24_CR54","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"24_CR55","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)"},{"key":"24_CR56","doi-asserted-by":"crossref","unstructured":"Sargent, K., et\u00a0al.: Zeronvs: zero-shot 360-degree view synthesis from a single real image. arXiv preprint arXiv:2310.17994 (2023)","DOI":"10.1109\/CVPR52733.2024.00900"},{"key":"24_CR57","doi-asserted-by":"crossref","unstructured":"Sarkar, A., Mai, H., Mahapatra, A., Lazebnik, S., Forsyth, D.A., Bhattad, A.: Shadows don\u2019t lie and lines can\u2019t bend! generative models don\u2019t know projective geometry... for now. arXiv preprint arXiv:2311.17138 (2023)","DOI":"10.1109\/CVPR52733.2024.02658"},{"key":"24_CR58","unstructured":"Schuhmann, C., et\u00a0al.: Laion-5b: an open large-scale dataset for training next generation image-text models. arXiv preprint arXiv:2210.08402 (2022)"},{"key":"24_CR59","unstructured":"Seo, J., et al.: Let 2d diffusion model know 3d-consistency for robust text-to-3d generation. arXiv preprint arXiv:2303.07937 (2023)"},{"key":"24_CR60","doi-asserted-by":"crossref","unstructured":"Seo, S., Han, D., Chang, Y., Kwak, N.: Mixnerf: modeling a ray with mixture density for novel view synthesis from sparse inputs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20659\u201320668 (2023)","DOI":"10.1109\/CVPR52729.2023.01979"},{"key":"24_CR61","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"24_CR62","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"24_CR63","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"24_CR64","doi-asserted-by":"crossref","unstructured":"Sun, S.H., Huh, M., Liao, Y.H., Zhang, N., Lim, J.J.: Multi-view to novel view: synthesizing novel views with self-learned confidence. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 155\u2013171 (2018)","DOI":"10.1007\/978-3-030-01219-9_10"},{"key":"24_CR65","doi-asserted-by":"crossref","unstructured":"Szymanowicz, S., Rupprecht, C., Vedaldi, A.: Viewset diffusion:(0-) image-conditioned 3d generative models from 2d data. arXiv preprint arXiv:2306.07881 (2023)","DOI":"10.1109\/ICCV51070.2023.00814"},{"key":"24_CR66","unstructured":"Tancik, M., et al.: Fourier features let networks learn high frequency functions in low dimensional domains. Adv. Neural. Inf. Process. Syst. 33, 7537\u20137547 (2020)"},{"key":"24_CR67","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Nerfstudio: a modular framework for neural radiance field development. In: ACM SIGGRAPH 2023 Conference Proceedings. SIGGRAPH 2023 (2023)","DOI":"10.1145\/3588432.3591516"},{"key":"24_CR68","unstructured":"Tang, L., Jia, M., Wang, Q., Phoo, C.P., Hariharan, B.: Emergent correspondence from image diffusion. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"24_CR69","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1007\/978-3-319-46478-7_20","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Tatarchenko","year":"2016","unstructured":"Tatarchenko, M., Dosovitskiy, A., Brox, T.: Multi-view 3D models from single images with a convolutional network. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 322\u2013337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_20"},{"key":"24_CR70","unstructured":"Tewari, A., et al.: Diffusion with forward models: solving stochastic inverse problems without direct supervision. arXiv preprint arXiv:2306.11719 (2023)"},{"key":"24_CR71","doi-asserted-by":"crossref","unstructured":"Truong, P., Rakotosaona, M.J., Manhardt, F., Tombari, F.: Sparf: neural radiance fields from sparse and noisy poses. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4190\u20134200 (2023)","DOI":"10.1109\/CVPR52729.2023.00408"},{"key":"24_CR72","doi-asserted-by":"crossref","unstructured":"Valevski, D., Wasserman, D., Matias, Y., Leviathan, Y.: Face0: instantaneously conditioning a text-to-image model on a face. arXiv preprint arXiv:2306.06638 (2023)","DOI":"10.1145\/3610548.3618249"},{"key":"24_CR73","unstructured":"Voynov, A., Chu, Q., Cohen-Or, D., Aberman, K.: $$ p+ $$: extended textual conditioning in text-to-image generation. arXiv preprint arXiv:2303.09522 (2023)"},{"key":"24_CR74","doi-asserted-by":"crossref","unstructured":"Wang, G., Chen, Z., Loy, C.C., Liu, Z.: Sparsenerf: distilling depth ranking for few-shot novel view synthesis. arXiv preprint arXiv:2303.16196 (2023)","DOI":"10.1109\/ICCV51070.2023.00832"},{"key":"24_CR75","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2d diffusion models for 3d generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"24_CR76","unstructured":"Wang, Z., Wu, S., Xie, W., Chen, M., Prisacariu, V.A.: Nerf\u2013: neural radiance fields without known camera parameters. arXiv preprint arXiv:2102.07064 (2021)"},{"key":"24_CR77","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:2302.13848 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"24_CR78","doi-asserted-by":"crossref","unstructured":"Wynn, J., Turmukhambetov, D.: Diffusionerf: Regularizing neural radiance fields with denoising diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4180\u20134189 (2023)","DOI":"10.1109\/CVPR52729.2023.00407"},{"key":"24_CR79","unstructured":"Xu, B., Wang, N., Chen, T., Li, M.: Empirical evaluation of rectified activations in convolutional network. arXiv preprint arXiv:1505.00853 (2015)"},{"key":"24_CR80","doi-asserted-by":"crossref","unstructured":"Xu, D., Jiang, Y., Wang, P., Fan, Z., Shi, H., Wang, Z.: Sinnerf: training neural radiance fields on complex scenes from a single image. In: European Conference on Computer Vision, pp. 736\u2013753. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-20047-2_42"},{"key":"24_CR81","doi-asserted-by":"crossref","unstructured":"Xu, D., Jiang, Y., Wang, P., Fan, Z., Wang, Y., Wang, Z.: Neurallift-360: lifting an in-the-wild 2d photo to a 3d object with 360$$^{\\circ }$$ views. arXiv e-prints, pp. arXiv\u20132211 (2022)","DOI":"10.1109\/CVPR52729.2023.00435"},{"key":"24_CR82","doi-asserted-by":"crossref","unstructured":"Yang, J., Pavone, M., Wang, Y.: Freenerf: improving few-shot neural rendering with free frequency regularization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8254\u20138263 (2023)","DOI":"10.1109\/CVPR52729.2023.00798"},{"key":"24_CR83","unstructured":"Yoo, P., Guo, J., Matsuo, Y., Gu, S.S.: Dreamsparse: escaping from Plato\u2019s cave with 2d frozen diffusion model given sparse views. arXiv preprint arXiv:2306.03414 (2023)"},{"key":"24_CR84","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelnerf: Neural radiance fields from one or few images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4578\u20134587 (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"24_CR85","unstructured":"Yu, Z., et al.: Sdfstudio: a unified framework for surface reconstruction (2022). https:\/\/github.com\/autonomousvision\/sdfstudio"},{"key":"24_CR86","unstructured":"Yu, Z., Peng, S., Niemeyer, M., Sattler, T., Geiger, A.: Monosdf: exploring monocular geometric cues for neural implicit surface reconstruction. Adv. Neural. Inf. Process. Syst. 35, 25018\u201325032 (2022)"},{"key":"24_CR87","unstructured":"Zhan, G., Zheng, C., Xie, W., Zisserman, A.: What does stable diffusion know about the 3d scene? arXiv preprint arXiv:2310.06836 (2023)"},{"key":"24_CR88","unstructured":"Zhang, J., et al.: A tale of two features: stable diffusion complements dino for zero-shot semantic correspondence. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"24_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, L., Agrawala, M.: Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"24_CR90","unstructured":"Zhang, Y., et al.: Prospect: expanded conditioning for the personalization of attribute-aware image generation. arXiv preprint arXiv:2305.16225 (2023)"},{"key":"24_CR91","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Inversion-based style transfer with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10146\u201310156 (2023)","DOI":"10.1109\/CVPR52729.2023.00978"},{"key":"24_CR92","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Tulsiani, S.: Sparsefusion: distilling view-conditioned diffusion for 3d reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12588\u201312597 (2023)","DOI":"10.1109\/CVPR52729.2023.01211"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73039-9_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:29:13Z","timestamp":1730302153000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73039-9_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031730382","9783031730399"],"references-count":92,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73039-9_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}