{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:29:45Z","timestamp":1780392585711,"version":"3.54.1"},"publisher-location":"Cham","reference-count":72,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729065","type":"print"},{"value":"9783031729072","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72907-2_23","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:22:17Z","timestamp":1730301737000},"page":"389-406","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Make-Your-3D: Fast and\u00a0Consistent Subject-Driven 3D Content Generation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9422-218X","authenticated-orcid":false,"given":"Fangfu","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0344-6664","authenticated-orcid":false,"given":"Hanyang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weiliang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0466-191X","authenticated-orcid":false,"given":"Haowen","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1190-6663","authenticated-orcid":false,"given":"Yueqi","family":"Duan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"23_CR1","unstructured":"stable-diffusion-xl-base-1.0. https:\/\/huggingface.co\/stabilityai\/stable-diffusion-xl-base-1.0. Accessed 29 Aug 2023"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Avrahami, O., et al.: The chosen one: consistent characters in text-to-image diffusion models. arXiv preprint arXiv:2311.10093 (2023)","DOI":"10.1145\/3641519.3657430"},{"key":"23_CR3","doi-asserted-by":"publisher","unstructured":"Cao, H., et al.: A survey on generative diffusion models. IEEE Trans. Knowl. Data Eng. 1\u201320 (2024). https:\/\/doi.org\/10.1109\/TKDE.2024.3361474","DOI":"10.1109\/TKDE.2024.3361474"},{"key":"23_CR4","unstructured":"Chang, A.X., et al.: Shapenet: an information-rich 3D model repository. arXiv preprint arXiv:1512.03012 (2015)"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., et al.: Single-stage diffusion nerf: a unified approach to 3D generation and reconstruction (2023)","DOI":"10.1109\/ICCV51070.2023.00229"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3d: disentangling geometry and appearance for high-quality text-to-3D content creation (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"23_CR7","unstructured":"Chen, W., et al.: Subject-driven text-to-image generation via apprenticeship learning. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"23_CR8","unstructured":"Deitke, M., et al.: Objaverse-xl: a universe of 10M+ 3D objects. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Deng, C., et al.: Nerdi: single-view nerf synthesis with language-guided diffusion as general image priors (2022)","DOI":"10.1109\/CVPR52729.2023.01977"},{"key":"23_CR11","unstructured":"Du, C., Li, Y., Qiu, Z., Xu, C.: Stable diffusion is unstable. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Eftekhar, A., Sax, A., Malik, J., Zamir, A.: Omnidata: a scalable pipeline for making multi-task mid-level vision datasets from 3D scans. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10786\u201310796 (2021)","DOI":"10.1109\/ICCV48922.2021.01061"},{"key":"23_CR13","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion (2022)"},{"key":"23_CR14","unstructured":"Gupta, A., Xiong, W., Nie, Y., Jones, I., O\u011fuz, B.: 3DGen: triplane latent diffusion for textured mesh generation (2023)"},{"key":"23_CR15","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"23_CR16","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models (2020)"},{"key":"23_CR17","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Huang, N., Zhang, T., Yuan, Y., Chen, D., Zhang, S.: Customize-it-3D: high-quality 3D creation from a single image using subject-specific knowledge prior (2024)","DOI":"10.1109\/ICCV51070.2023.02086"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Jiang, Y., et al.: Videobooth: diffusion-based video generation with image prompts (2023)","DOI":"10.1109\/CVPR52733.2024.00639"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4) (2023)","DOI":"10.1145\/3592433"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Kim, S.W., et al.: NeuralField-LDM: scene generation with hierarchical latent diffusion models (2023)","DOI":"10.1109\/CVPR52729.2023.00821"},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"23_CR23","unstructured":"Li, C., et al.: Generative AI meets 3D: a survey on text-to-3D in AIGC era. arXiv preprint arXiv:2305.06131 (2023)"},{"key":"23_CR24","unstructured":"Li, X., et al.: Advances in 3D generation: a survey (2024)"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3d: high-resolution text-to-3D content creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, D., Wei, Y., Rao, Y., Duan, Y.: Sherpa3d: boosting high-fidelity text-to-3D generation via coarse 3D prior. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20763\u201320774 (2024)","DOI":"10.1109\/CVPR52733.2024.01962"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Liu, F., Zhang, C., Zheng, Y., Duan, Y.: Semantic ray: learning a generalizable semantic field with cross-reprojection attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17386\u201317396 (2023)","DOI":"10.1109\/CVPR52729.2023.01668"},{"key":"23_CR28","unstructured":"Liu, J., et al.: A comprehensive survey on 3D content generation. arXiv preprint arXiv:2402.01166 (2024)"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Long, X., et al.: Wonder3d: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Lorraine, J., et al.: ATT3D: amortized text-to-3D object synthesis (2023)","DOI":"10.1109\/ICCV51070.2023.01645"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Luo, S., Hu, W.: Diffusion probabilistic models for 3D point cloud generation (2021)","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"23_CR32","unstructured":"Ma, Z., et al.: Magic-me: identity-specific video customized diffusion. arXiv preprint arXiv:2402.09368 (2024)"},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Rupprecht, C., Laina, I., Vedaldi, A.: Realfusion: 360$${\\deg }$$ reconstruction of any object from a single image (2023)","DOI":"10.1109\/CVPR52729.2023.00816"},{"issue":"1","key":"23_CR34","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"issue":"4","key":"23_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"23_CR36","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-e: a system for generating 3D point clouds from complex prompts (2022)"},{"key":"23_CR37","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion (2022)"},{"key":"23_CR38","unstructured":"Qian, G., et al.: Magic123: one image to high-quality 3D object generation using both 2D and 3D diffusion priors (2023)"},{"key":"23_CR39","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Raj, A., et al.: Dreambooth3d: subject-driven text-to-3D generation. arXiv preprint arXiv:2303.13508 (2023)","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"23_CR41","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069. PMLR (2016)"},{"key":"23_CR42","doi-asserted-by":"crossref","unstructured":"Ren, Y., et al.: Customize-a-video: one-shot motion customization of text-to-video diffusion models (2024)","DOI":"10.1007\/978-3-031-73024-5_20"},{"key":"23_CR43","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"23_CR44","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"23_CR45","doi-asserted-by":"crossref","unstructured":"Ruiz, N., et al.: Hyperdreambooth: hypernetworks for fast personalization of text-to-image models. arXiv preprint arXiv:2307.06949 (2023)","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"23_CR46","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"23_CR47","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: Mvdream: multi-view diffusion for 3D generation (2023)"},{"key":"23_CR48","doi-asserted-by":"crossref","unstructured":"Shue, J.R., Chan, E.R., Po, R., Ankner, Z., Wu, J., Wetzstein, G.: 3D neural field generation using triplane diffusion (2022)","DOI":"10.1109\/CVPR52729.2023.02000"},{"key":"23_CR49","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"23_CR50","unstructured":"Sun, J., et al.: Dreamcraft3d: hierarchical 3D generation with bootstrapped diffusion prior (2023)"},{"key":"23_CR51","doi-asserted-by":"crossref","unstructured":"Tang, J., Chen, Z., Chen, X., Wang, T., Zeng, G., Liu, Z.: LGM: large multi-view gaussian model for high-resolution 3D content creation (2024)","DOI":"10.1007\/978-3-031-73235-5_1"},{"key":"23_CR52","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: Dreamgaussian: generative gaussian splatting for efficient 3D content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"23_CR53","doi-asserted-by":"crossref","unstructured":"Tang, J., et al.: Delicate textured mesh recovery from nerf via adaptive surface refinement. arXiv preprint arXiv:2303.02091 (2023)","DOI":"10.1109\/ICCV51070.2023.01626"},{"key":"23_CR54","doi-asserted-by":"crossref","unstructured":"Tang, J., et al.: Make-it-3D: high-fidelity 3D creation from a single image with diffusion prior (2023)","DOI":"10.1109\/ICCV51070.2023.02086"},{"key":"23_CR55","unstructured":"Tang, S., Zhang, F., Chen, J., Wang, P., Furukawa, Y.: Mvdiffusion: enabling holistic multi-view image generation with correspondence-aware diffusion (2023)"},{"key":"23_CR56","unstructured":"Wang, P., Shi, Y.: Imagedream: image-prompt multi-view diffusion for 3D generation. arXiv preprint arXiv:2312.02201 (2023)"},{"key":"23_CR57","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3D generation with variational score distillation (2023)"},{"key":"23_CR58","doi-asserted-by":"crossref","unstructured":"Wei, Y., et al.: Dreamvideo: composing your dream videos with customized subject and motion. arXiv preprint arXiv:2312.04433 (2023)","DOI":"10.1109\/CVPR52733.2024.00625"},{"key":"23_CR59","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, B., Go, H., Kim, J.Y., Kim, C.: Harmonyview: harmonizing consistency and diversity in one-image-to-3D (2023)","DOI":"10.1109\/CVPR52733.2024.01006"},{"key":"23_CR60","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"23_CR61","unstructured":"Wu, J., Gan, W., Chen, Z., Wan, S., Lin, H.: AI-generated content (AIGC): a survey. arXiv preprint arXiv:2304.06632 (2023)"},{"key":"23_CR62","doi-asserted-by":"crossref","unstructured":"Xing, J., et al.: Make-your-video: customized video generation using textual and structural guidance. arXiv preprint arXiv:2306.00943 (2023)","DOI":"10.1109\/TVCG.2024.3365804"},{"key":"23_CR63","doi-asserted-by":"crossref","unstructured":"Xing, Z., et al.: A survey on video diffusion models (2023)","DOI":"10.1145\/3696415"},{"key":"23_CR64","doi-asserted-by":"crossref","unstructured":"Xu, D., Jiang, Y., Wang, P., Fan, Z., Wang, Y., Wang, Z.: Neurallift-360: lifting an in-the-wild 2D photo to a 3D object with 360$${\\deg }$$ views (2023)","DOI":"10.1109\/CVPR52729.2023.00435"},{"issue":"4","key":"23_CR65","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3626235","volume":"56","author":"L Yang","year":"2023","unstructured":"Yang, L., et al.: Diffusion models: a comprehensive survey of methods and applications. ACM Comput. Surv. 56(4), 1\u201339 (2023)","journal-title":"ACM Comput. Surv."},{"key":"23_CR66","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"23_CR67","unstructured":"Zeng, Y., Lu, Y., Ji, X., Yao, Y., Zhu, H., Cao, X.: Avatarbooth: high-quality and customizable 3D human avatar generation. arXiv preprint arXiv:2306.09864 (2023)"},{"key":"23_CR68","doi-asserted-by":"crossref","unstructured":"Zhang, B., Tang, J., Niessner, M., Wonka, P.: 3dshape2vecset: a 3D shape representation for neural fields and generative diffusion models (2023)","DOI":"10.1145\/3592442"},{"key":"23_CR69","unstructured":"Zhang, C., Zhang, C., Zhang, M., Kweon, I.S.: Text-to-image diffusion model in generative AI: a survey. arXiv preprint arXiv:2303.07909 (2023)"},{"key":"23_CR70","unstructured":"Zhang, C., Zhang, C., Zhang, M., Kweon, I.S.: Text-to-image diffusion models in generative AI: a survey (2023)"},{"key":"23_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Repaint123: fast and high-quality one image to 3D generation with progressive controllable 2D repainting (2023)","DOI":"10.1007\/978-3-031-72698-9_18"},{"key":"23_CR72","unstructured":"Zhao, Z., et al.: Michelangelo: conditional 3D shape generation based on shape-image-text aligned latent representation (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72907-2_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T14:19:23Z","timestamp":1732976363000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72907-2_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031729065","9783031729072"],"references-count":72,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72907-2_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}