{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T18:01:05Z","timestamp":1774634465710,"version":"3.50.1"},"publisher-location":"Cham","reference-count":61,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726514","type":"print"},{"value":"9783031726521","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72652-1_5","type":"book-chapter","created":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T08:29:02Z","timestamp":1730190542000},"page":"74-91","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["UniDream: Unifying Diffusion Priors for\u00a0Relightable Text-to-3D Generation"],"prefix":"10.1007","author":[{"given":"Zexiang","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yangguang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Youtian","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Sida","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Yan-Pei","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Xiaojuan","family":"Qi","sequence":"additional","affiliation":[]},{"given":"Xiaoshui","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Ding","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,30]]},"reference":[{"issue":"4","key":"5_CR1","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1145\/2461912.2461978","volume":"32","author":"M Aittala","year":"2013","unstructured":"Aittala, M., Weyrich, T., Lehtinen, J.: Practical svbrdf capture in the frequency domain. ACM Trans. Graph. 32(4), 110\u20131 (2013)","journal-title":"ACM Trans. Graph."},{"key":"5_CR2","unstructured":"Burley, B., Studios, W.D.A.: Physically-based shading at disney. In: ACM Siggraph. vol.\u00a02012, pp.\u00a01\u20137 (2012)"},{"key":"5_CR3","doi-asserted-by":"publisher","unstructured":"Chen, F.L., et al.: Vlp: a survey on vision-language pre-training. Mach. Intell. Res. 20(1), 38\u201356 (2023) https:\/\/doi.org\/10.1007\/s11633-022-1369-5","DOI":"10.1007\/s11633-022-1369-5"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3d: Disentangling geometry and appearance for high-quality text-to-3d content creation. arXiv preprint arXiv:2303.13873 (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3d: Disentangling geometry and appearance for high-quality text-to-3d content creation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (October 2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, Y.C., Lee, H.Y., Tulyakov, S., Schwing, A.G., Gui, L.Y.: Sdfusion: multimodal 3d shape completion, reconstruction, and generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4456\u20134465 (2023)","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3d objects. arXiv preprint arXiv:2212.08051 (2022)","DOI":"10.1109\/CVPR52729.2023.01263"},{"issue":"4","key":"5_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323042","volume":"38","author":"D Gao","year":"2019","unstructured":"Gao, D., Li, X., Dong, Y., Peers, P., Xu, K., Tong, X.: Deep inverse rendering for high-resolution svbrdf estimation from an arbitrary number of images. ACM Trans. Graph. 38(4), 1\u2013134 (2019)","journal-title":"ACM Trans. Graph."},{"key":"5_CR9","unstructured":"Gupta, A., Xiong, W., Nie, Y., Jones, I., O\u011fuz, B.: 3dgen: Triplane latent diffusion for textured mesh generation. arXiv preprint arXiv:2303.05371 (2023)"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"He, X., et al.: Gvgen: Text-to-3d generation with volumetric representation. arXiv preprint arXiv:2403.12957 (2024)","DOI":"10.1007\/978-3-031-73242-3_26"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: Clipscore: A reference-free evaluation metric for image captioning (2022)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"5_CR12","unstructured":"Hong, Y., et al.: Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400 (2023)"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Hui, K.H., Li, R., Hu, J., Fu, C.W.: Neural wavelet-domain diffusion for 3d shape generation. In: SIGGRAPH Asia 2022 Conference Papers, pp.\u00a01\u20139 (2022)","DOI":"10.1145\/3550469.3555394"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Jain, A., Mildenhall, B., Barron, J.T., Abbeel, P., Poole, B.: Zero-shot text-guided object generation with dream fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 867\u2013876 (2022)","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"5_CR15","unstructured":"Jun, H., Nichol, A.: Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:2305.02463 (2023)"},{"issue":"3","key":"5_CR16","first-page":"1","volume":"4","author":"B Karis","year":"2013","unstructured":"Karis, B., Games, E.: Real shading in unreal engine 4. Proc. Physically Based Shading Theory Pract. 4(3), 1 (2013)","journal-title":"Proc. Physically Based Shading Theory Pract."},{"key":"5_CR17","unstructured":"al Li, J., et al.: Instant3d: Fast text-to-3d with sparse-view generation and large reconstruction model. arXiv preprint arXiv:2311.06214 (2023)"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3d: High-resolution text-to-3d content creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, D., Wei, Y., Rao, Y., Duan, Y.: Sherpa3d: boosting high-fidelity text-to-3d generation via coarse 3d prior. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20763\u201320774 (2024)","DOI":"10.1109\/CVPR52733.2024.01962"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3d object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"5_CR21","unstructured":"Liu, Y., et al.: Syncdreamer: Learning to generate multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3d: Single image to 3d using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"5_CR23","unstructured":"Luo, T., Rockwell, C., Lee, H., Johnson, J.: Scalable 3d captioning with pretrained models (2023)"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., Cohen-Or, D.: Latent-nerf for shape-guided generation of 3d shapes and textures. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12663\u201312673 (2023)","DOI":"10.1109\/CVPR52729.2023.01218"},{"issue":"1","key":"5_CR25","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Mohammad\u00a0Khalid, N., Xie, T., Belilovsky, E., Popa, T.: Clip-mesh: generating textured meshes from text using pretrained image-text models. In: SIGGRAPH Asia 2022 conference papers, pp.\u00a01\u20138 (2022)","DOI":"10.1145\/3550469.3555392"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Munkberg, J., et al.: Extracting triangular 3d models, materials, and lighting from images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8280\u20138290 (2022)","DOI":"10.1109\/CVPR52688.2022.00810"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Munkberg, J., et al.: Extracting Triangular 3D Models, Materials, and Lighting From Images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8280\u20138290 (June 2022)","DOI":"10.1109\/CVPR52688.2022.00810"},{"issue":"6","key":"5_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3272127.3275017","volume":"37","author":"G Nam","year":"2018","unstructured":"Nam, G., Lee, J.H., Gutierrez, D., Kim, M.H.: Practical svbrdf acquisition of 3d objects with unstructured flash photography. ACM Trans. Graph. (TOG) 37(6), 1\u201312 (2018)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"5_CR30","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)"},{"issue":"7","key":"5_CR31","doi-asserted-by":"publisher","first-page":"767","DOI":"10.1364\/AO.4.000767","volume":"4","author":"FE Nicodemus","year":"1965","unstructured":"Nicodemus, F.E.: Directional reflectance and emissivity of an opaque surface. Appl. Opt. 4(7), 767\u2013775 (1965)","journal-title":"Appl. Opt."},{"key":"5_CR32","unstructured":"Oquab, M., et\u00a0al.: Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"5_CR33","unstructured":"Park, D.H., Azadi, S., Liu, X., Darrell, T., Rohrbach, A.: Benchmark for compositional text-to-image synthesis. In: Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1) (2021)"},{"key":"5_CR34","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"5_CR35","doi-asserted-by":"crossref","unstructured":"Qiu, L., et al.: Richdreamer: a generalizable normal-depth diffusion model for detail richness in text-to-3d. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9914\u20139925 (2024)","DOI":"10.1109\/CVPR52733.2024.00946"},{"key":"5_CR36","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"5_CR38","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"5_CR39","first-page":"6087","volume":"34","author":"T Shen","year":"2021","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3d shape synthesis. Adv. Neural. Inf. Process. Syst. 34, 6087\u20136101 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"5_CR40","unstructured":"Shi, Y., Wang, P., Ye, J., Mai, L., Li, K., Yang, X.: Mvdream: Multi-view diffusion for 3d generation. arXiv:2308.16512 (2023)"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Srinivasan, P.P., Deng, B., Zhang, X., Tancik, M., Mildenhall, B., Barron, J.T.: Nerv: neural reflectance and visibility fields for relighting and view synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7495\u20137504 (2021)","DOI":"10.1109\/CVPR46437.2021.00741"},{"key":"5_CR42","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: Dreamgaussian: Generative gaussian splatting for efficient 3d content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"5_CR43","unstructured":"Tochilkin, D., et al.: Triposr: Fast 3d object reconstruction from a single image. arXiv preprint arXiv:2403.02151 (2024)"},{"key":"5_CR44","doi-asserted-by":"publisher","unstructured":"Wang, X., et al.: Large-scale multi-modal pre-trained models: a comprehensive survey. Mach. Intell. Res. 20(4), 447\u2013482 (2023). https:\/\/doi.org\/10.1007\/s11633-022-1410-8","DOI":"10.1007\/s11633-022-1410-8"},{"key":"5_CR45","unstructured":"Wang, Z., et al.: Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation. arXiv preprint arXiv:2305.16213 (2023)"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: Crm: Single image to 3d textured mesh with convolutional reconstruction model. arXiv preprint arXiv:2403.05034 (2024)","DOI":"10.1007\/978-3-031-72751-1_4"},{"key":"5_CR47","unstructured":"Wei, X., et al.: Meshlrm: Large reconstruction model for high-quality mesh. arXiv preprint arXiv:2404.12385 (2024)"},{"key":"5_CR48","unstructured":"Wiig, T.: Blur latent noise (2023). https:\/\/gist.github.com\/trygvebw\/e51573d40841d22c11fc32df6863ef58"},{"key":"5_CR49","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"5_CR50","unstructured":"Xu, J., Cheng, W., Gao, Y., Wang, X., Gao, S., Shan, Y.: Instantmesh: Efficient 3d mesh generation from a single image with sparse-view large reconstruction models. arXiv preprint arXiv:2404.07191 (2024)"},{"key":"5_CR51","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: Dream3d: zero-shot text-to-3d synthesis using 3d shape prior and text-to-image diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20908\u201320918 (2023)","DOI":"10.1109\/CVPR52729.2023.02003"},{"key":"5_CR52","unstructured":"Xu, X., Lyu, Z., Pan, X., Dai, B.: Matlaber: Material-aware text-to-3d via latent brdf auto-encoder. arXiv preprint arXiv:2308.09278 (2023)"},{"key":"5_CR53","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Grm: Large gaussian reconstruction model for efficient 3d reconstruction and generation. arXiv preprint arXiv:2403.14621 (2024)","DOI":"10.1007\/978-3-031-72633-0_1"},{"key":"5_CR54","doi-asserted-by":"crossref","unstructured":"Yu, C., Zhou, Q., Li, J., Zhang, Z., Wang, Z., Wang, F.: Points-to-3d: bridging the gap between sparse points and shape-controllable text-to-3d generation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 6841\u20136850 (2023)","DOI":"10.1145\/3581783.3612232"},{"key":"5_CR55","unstructured":"Yu, X., Guo, Y.C., Li, Y., Liang, D., Zhang, S.H., Qi, X.: Text-to-3d with classifier score distillation. arXiv preprint arXiv:2310.19415 (2023)"},{"key":"5_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, K., et al.: Gs-lrm: Large reconstruction model for 3d gaussian splatting. arXiv preprint arXiv:2404.19702 (2024)","DOI":"10.1007\/978-3-031-72670-5_1"},{"issue":"6","key":"5_CR57","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480500","volume":"40","author":"X Zhang","year":"2021","unstructured":"Zhang, X., Srinivasan, P.P., Deng, B., Debevec, P., Freeman, W.T., Barron, J.T.: Nerfactor: neural factorization of shape and reflectance under an unknown illumination. ACM Trans. Graph. (ToG) 40(6), 1\u201318 (2021)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"5_CR58","unstructured":"Zhao, M., et al.: Efficientdreamer: High-fidelity and robust 3d creation via orthogonal-view diffusion prior. arXiv preprint arXiv:2308.13223 (2023)"},{"key":"5_CR59","doi-asserted-by":"crossref","unstructured":"Zheng, X.Y., Pan, H., Wang, P.S., Tong, X., Liu, Y., Shum, H.Y.: Locally attentional sdf diffusion for controllable 3d shape generation. arXiv preprint arXiv:2305.04461 (2023)","DOI":"10.1145\/3592103"},{"key":"5_CR60","unstructured":"Zhuo, L., et\u00a0al.: Lumina-next: Making lumina-t2x stronger and faster with next-dit. arXiv preprint arXiv:2406.18583 (2024)"},{"key":"5_CR61","doi-asserted-by":"crossref","unstructured":"Zou, Z.X., et al.: Triplane meets gaussian splatting: fast and generalizable single-view 3d reconstruction with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10324\u201310335 (2024)","DOI":"10.1109\/CVPR52733.2024.00983"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72652-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T11:59:59Z","timestamp":1732967999000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72652-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,30]]},"ISBN":["9783031726514","9783031726521"],"references-count":61,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72652-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,30]]},"assertion":[{"value":"30 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}