{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:03:22Z","timestamp":1774631002825,"version":"3.50.1"},"publisher-location":"Cham","reference-count":114,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729423","type":"print"},{"value":"9783031729430","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72943-0_22","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:39:55Z","timestamp":1732801195000},"page":"381-401","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Disentangled Clothed Avatar Generation from\u00a0Text Descriptions"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9683-8547","authenticated-orcid":false,"given":"Jionghao","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2933-5667","authenticated-orcid":false,"given":"Yuan","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0186-8269","authenticated-orcid":false,"given":"Zhiyang","family":"Dou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0553-8125","authenticated-orcid":false,"given":"Zhengming","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7282-0476","authenticated-orcid":false,"given":"Yongqing","family":"Liang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3335-6623","authenticated-orcid":false,"given":"Cheng","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8261-5337","authenticated-orcid":false,"given":"Rong","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7124-5182","authenticated-orcid":false,"given":"Li","family":"Song","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0144-9489","authenticated-orcid":false,"given":"Xin","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2284-3952","authenticated-orcid":false,"given":"Wenping","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"22_CR1","unstructured":"CLO3D (2023). https:\/\/www.clo3d.com\/"},{"key":"22_CR2","unstructured":"Marvelous designer (2023). https:\/\/www.marvelousdesigner.com"},{"issue":"4","key":"22_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592458","volume":"42","author":"S Alexanderson","year":"2023","unstructured":"Alexanderson, S., Nagy, R., Beskow, J., Henter, G.E.: Listen, denoise, action! audio-driven motion synthesis with diffusion models. ACM Trans. Graph. (TOG) 42(4), 1\u201320 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"22_CR4","unstructured":"Anonymous: Avatarstudio: High-fidelity and animatable 3D avatar creation from text (2023)"},{"key":"22_CR5","unstructured":"Armandpour, M., Zheng, H., Sadeghian, A., Sadeghian, A., Zhou, M.: Re-imagine the negative prompt algorithm: transform 2D diffusion into 3D, alleviate Janus problem and beyond. arXiv preprint arXiv:2304.04968 (2023)"},{"key":"22_CR6","unstructured":"Bautista, M.A., et al.: Gaudi: a neural architect for immersive 3D scene generation. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25102\u201325116 (2022)"},{"key":"22_CR7","unstructured":"Bergman, A.W., Kellnhofer, P., Wang, Y., Chan, E.R., Lindell, D.B., Wetzstein, G.: Generative neural articulated radiance fields. In: NeurIPS (2022)"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Cao, T., Kreis, K., Fidler, S., Sharp, N., Yin, K.: Texfusion: synthesizing 3D textures with text-guided image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4169\u20134181 (2023)","DOI":"10.1109\/ICCV51070.2023.00385"},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Cao, Y., Cao, Y.P., Han, K., Shan, Y., Wong, K.Y.K.: Dreamavatar: text-and-shape guided 3D human avatar generation via diffusion models. arXiv preprint arXiv:2304.00916 (2023)","DOI":"10.1109\/CVPR52733.2024.00097"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2D pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et\u00a0al.: Efficient geometry-aware 3D generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133 (2022)","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Siddiqui, Y., Lee, H.Y., Tulyakov, S., Nie\u00dfner, M.: Text2tex: text-driven texture synthesis via diffusion models. arXiv preprint arXiv:2303.11396 (2023)","DOI":"10.1109\/ICCV51070.2023.01701"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation. arXiv preprint arXiv:2303.13873 (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"22_CR14","unstructured":"Chen, X., et al.: Learning variational motion prior for video-based motion capture. arXiv preprint arXiv:2210.15134 (2022)"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: gDNA: towards generative detailed neural avatars. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20427\u201320437 (2022)","DOI":"10.1109\/CVPR52688.2022.01978"},{"key":"22_CR16","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: IT3D: improved text-to-3D generation with explicit view synthesis. arXiv preprint arXiv:2308.11473 (2023)","DOI":"10.1609\/aaai.v38i2.27886"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: L-tracing: fast light visibility estimation on neural surfaces by sphere tracing. In: Proceedings of the European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19784-0_13"},{"key":"22_CR18","unstructured":"Cong, P., et al.: Laserhuman: language-guided scene-aware human motion generation in free environment. arXiv preprint arXiv:2403.13307 (2024)"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Corona, E., Pumarola, A., Alenya, G., Pons-Moll, G., Moreno-Noguer, F.: SMPLicit: topology-aware generative model for clothed people. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11875\u201311885 (2021)","DOI":"10.1109\/CVPR46437.2021.01170"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Deitke, M., et\u00a0al.: Objaverse-XL: a universe of 10m+ 3D objects. arXiv preprint arXiv:2307.05663 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"22_CR22","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Dou, Z., Chen, X., Fan, Q., Komura, T., Wang, W.: C$$\\cdot $$ASE: learning conditional adversarial skill embeddings for physics-based characters. arXiv preprint arXiv:2309.11351 (2023)","DOI":"10.1145\/3610548.3618205"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Dou, Z., et al.: Tore: token reduction for efficient human mesh recovery with transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15143\u201315155 (2023)","DOI":"10.1109\/ICCV51070.2023.01390"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3d scanned household items. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2553\u20132560. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"22_CR26","unstructured":"Feng, Y., Liu, W., Bolkart, T., Yang, J., Pollefeys, M., Black, M.J.: Learning disentangled avatars with hybrid 3D representations. arXiv preprint arXiv:2309.06441 (2023)"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Feng, Y., Yang, J., Pollefeys, M., Black, M.J., Bolkart, T.: Capturing and animation of body and clothing from monocular video. In: SIGGRAPH Asia 2022 Conference Papers, pp.\u00a01\u20139 (2022)","DOI":"10.1145\/3550469.3555423"},{"key":"22_CR28","doi-asserted-by":"crossref","unstructured":"Grigorev, A., et al.: Stylepeople: a generative model of fullbody human avatars. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5151\u20135160 (2021)","DOI":"10.1109\/CVPR46437.2021.00511"},{"key":"22_CR29","unstructured":"Guo, Y., et al.: Decorate3D: text-driven high-quality texture generation for mesh decoration in the wild. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023)"},{"key":"22_CR30","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: Clipscore: a reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"22_CR31","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"22_CR32","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"22_CR33","unstructured":"Hong, F., Chen, Z., Lan, Y., Pan, L., Liu, Z.: EVA3D: compositional 3D human generation from 2D image collections. arXiv preprint arXiv:2210.04888 (2022)"},{"key":"22_CR34","doi-asserted-by":"crossref","unstructured":"Hong, F., Zhang, M., Pan, L., Cai, Z., Yang, L., Liu, Z.: Avatarclip: zero-shot text-driven generation and animation of 3D avatars. arXiv preprint arXiv:2205.08535 (2022)","DOI":"10.1145\/3528223.3530094"},{"key":"22_CR35","unstructured":"Hu, S., et al.: Humanliff: layer-wise 3D human generation with diffusion model. arXiv preprint arXiv:2308.09712 (2023)"},{"key":"22_CR36","doi-asserted-by":"crossref","unstructured":"Huang, S., Yang, Z., Li, L., Yang, Y., Jia, J.: Avatarfusion: zero-shot generation of clothing-decoupled 3D avatars using 2D diffusion. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 5734\u20135745 (2023)","DOI":"10.1145\/3581783.3612022"},{"key":"22_CR37","doi-asserted-by":"crossref","unstructured":"Huang, X., et al.: Humannorm: learning normal diffusion model for high-quality and realistic 3D human generation. arXiv preprint arXiv:2310.01406 (2023)","DOI":"10.1109\/CVPR52733.2024.00437"},{"key":"22_CR38","unstructured":"Huang, Y., et al.: Tech: text-guided reconstruction of lifelike clothed humans. arXiv preprint arXiv:2308.08545 (2023)"},{"key":"22_CR39","unstructured":"Huang, Y., Wang, J., Shi, Y., Qi, X., Zha, Z.J., Zhang, L.: Dreamtime: an improved optimization strategy for text-to-3D content creation. arXiv preprint arXiv:2306.12422 (2023)"},{"key":"22_CR40","unstructured":"Huang, Y., et al.: Dreamwaltz: make a scene with complex 3D animatable avatars. arXiv preprint arXiv:2305.12529 (2023)"},{"key":"22_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/978-3-030-58565-5_2","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Jiang","year":"2020","unstructured":"Jiang, B., Zhang, J., Hong, Y., Luo, J., Liu, L., Bao, H.: BCNet: learning body and cloth shape from a single image. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 18\u201335. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_2"},{"key":"22_CR42","unstructured":"Jun, H., Nichol, A.: Shap-e: generating conditional 3D implicit functions. arXiv preprint arXiv:2305.02463 (2023)"},{"key":"22_CR43","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Tulsiani, S., Efros, A.A., Malik, J.: Learning category-specific mesh reconstruction from image collections. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 371\u2013386 (2018)","DOI":"10.1007\/978-3-030-01267-0_23"},{"key":"22_CR44","doi-asserted-by":"crossref","unstructured":"Karnewar, A., Vedaldi, A., Novotny, D., Mitra, N.J.: Holodiffusion: training a 3D diffusion model using 2D images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18423\u201318433 (2023)","DOI":"10.1109\/CVPR52729.2023.01767"},{"key":"22_CR45","doi-asserted-by":"crossref","unstructured":"Kim, B., et al.: Chupa: carving 3D clothed humans from skinned shape priors using 2D diffusion probabilistic models. arXiv preprint arXiv:2305.11870 (2023)","DOI":"10.1109\/ICCV51070.2023.01463"},{"key":"22_CR46","unstructured":"Kolotouros, N., Alldieck, T., Zanfir, A., Bazavan, E.G., Fieraru, M., Sminchisescu, C.: Dreamhuman: animatable 3D avatars from text. arXiv preprint arXiv:2306.09329 (2023)"},{"key":"22_CR47","unstructured":"Kynk\u00e4\u00e4nniemi, T., Karras, T., Aittala, M., Aila, T., Lehtinen, J.: The role of imagenet classes in fr$$\\backslash $$\u2019echet inception distance. arXiv preprint arXiv:2203.06026 (2022)"},{"key":"22_CR48","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, C., Chen, Z., Bian, S., Yang, L., Lu, C.: Hybrik: a hybrid analytical-neural inverse kinematics solution for 3D human pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3383\u20133393 (2021)","DOI":"10.1109\/CVPR46437.2021.00339"},{"key":"22_CR49","unstructured":"Li, R., Yang, S., Ross, D.A., Kanazawa, A.: Learn to dance with AIST++: music conditioned 3D dance generation (2021)"},{"key":"22_CR50","doi-asserted-by":"crossref","unstructured":"Liao, T., et al.: TADA! text to animatable digital avatars. In: International Conference on 3D Vision (3DV) (2024)","DOI":"10.1109\/3DV62453.2024.00150"},{"key":"22_CR51","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: high-resolution text-to-3D content creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"22_CR52","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"22_CR53","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: HumanGaussian: text-driven 3D human generation with Gaussian splatting. arXiv preprint arXiv:2311.17061 (2023)","DOI":"10.1109\/CVPR52733.2024.00635"},{"key":"22_CR54","unstructured":"Liu, Y., et al.: Syncdreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"22_CR55","unstructured":"Liu, Z., Feng, Y., Black, M.J., Nowrouzezahrai, D., Paull, L., Liu, W.: Meshdiffusion: score-based generative 3D mesh modeling. arXiv preprint arXiv:2303.08133 (2023)"},{"key":"22_CR56","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3D: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"22_CR57","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graphics (Proc. SIGGRAPH Asia) 34(6), 248:1\u2013248:16 (2015)","DOI":"10.1145\/2816795.2818013"},{"key":"22_CR58","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: AMASS: archive of motion capture as surface shapes. In: International Conference on Computer Vision, pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"22_CR59","doi-asserted-by":"crossref","unstructured":"Michel, O., Bar-On, R., Liu, R., Benaim, S., Hanocka, R.: Text2mesh: text-driven neural stylization for meshes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13492\u201313502 (2022)","DOI":"10.1109\/CVPR52688.2022.01313"},{"issue":"1","key":"22_CR60","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"22_CR61","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-e: a system for generating 3D point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)"},{"key":"22_CR62","doi-asserted-by":"crossref","unstructured":"Noguchi, A., Sun, X., Lin, S., Harada, T.: Neural articulated radiance field. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5762\u20135772 (2021)","DOI":"10.1109\/ICCV48922.2021.00571"},{"key":"22_CR63","doi-asserted-by":"crossref","unstructured":"Noguchi, A., Sun, X., Lin, S., Harada, T.: Unsupervised learning of efficient geometry-aware neural articulated representations. arXiv:2204.08839 (2022)","DOI":"10.1007\/978-3-031-19790-1_36"},{"key":"22_CR64","doi-asserted-by":"crossref","unstructured":"Parmar, G., Zhang, R., Zhu, J.Y.: On aliased resizing and surprising subtleties in GAN evaluation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01112"},{"key":"22_CR65","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3D hands, face, and body from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"22_CR66","doi-asserted-by":"crossref","unstructured":"Peng, H.Y., Zhang, J.P., Guo, M.H., Cao, Y.P., Hu, S.M.: Charactergen: efficient 3D character generation from single images with multi-view pose canonicalization. arXiv preprint arXiv:2402.17214 (2024)","DOI":"10.1145\/3658217"},{"key":"22_CR67","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. arXiv (2022)"},{"key":"22_CR68","unstructured":"Qian, G., et\u00a0al.: Magic123: one image to high-quality 3D object generation using both 2D and 3D diffusion priors. arXiv preprint arXiv:2306.17843 (2023)"},{"key":"22_CR69","doi-asserted-by":"crossref","unstructured":"Qian, Z., Wang, S., Mihajlovic, M., Geiger, A., Tang, S.: 3DGS-avatar: animatable avatars via deformable 3D Gaussian splatting. arXiv preprint arXiv:2312.09228 (2023)","DOI":"10.1109\/CVPR52733.2024.00480"},{"key":"22_CR70","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"22_CR71","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotny, D.: Common objects in 3D: large-scale learning and evaluation of real-life 3D category reconstruction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10901\u201310911 (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"22_CR72","doi-asserted-by":"crossref","unstructured":"Richardson, E., Metzer, G., Alaluf, Y., Giryes, R., Cohen-Or, D.: Texture: text-guided texturing of 3D shapes. arXiv preprint arXiv:2302.01721 (2023)","DOI":"10.1145\/3588432.3591503"},{"key":"22_CR73","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models (2021)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"22_CR74","unstructured":"Seo, H., Kim, H., Kim, G., Chun, S.Y.: Ditto-nerf: diffusion-based iterative text to omni-directional 3D model. arXiv preprint arXiv:2304.02827 (2023)"},{"key":"22_CR75","unstructured":"Seo, J., et al.: Let 2D diffusion model know 3D-consistency for robust text-to-3D generation. arXiv preprint arXiv:2303.07937 (2023)"},{"key":"22_CR76","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3D shape synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 6087\u20136101 (2021)"},{"issue":"1","key":"22_CR77","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3407659","volume":"40","author":"M Shi","year":"2020","unstructured":"Shi, M., et al.: Motionet: 3D human motion reconstruction from monocular video with skeleton consistency. ACM Trans. Graph. (TOG) 40(1), 1\u201315 (2020)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"22_CR78","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: MVDream: multi-view diffusion for 3D generation. arXiv preprint arXiv:2308.16512 (2023)"},{"key":"22_CR79","doi-asserted-by":"crossref","unstructured":"Sklyarova, V., Zakharov, E., Hilliges, O., Black, M.J., Thies, J.: Haar: text-conditioned generative model of 3D strand-based human hairstyles. arXiv (2023)","DOI":"10.1109\/CVPR52733.2024.00450"},{"key":"22_CR80","doi-asserted-by":"crossref","unstructured":"Svitov, D., Gudkov, D., Bashirov, R., Lempitsky, V.: Dinar: diffusion inpainting of neural textures for one-shot human avatars. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7062\u20137072 (2023)","DOI":"10.1109\/ICCV51070.2023.00650"},{"key":"22_CR81","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.H.: Human motion diffusion model. arXiv preprint arXiv:2312.02256 (2023)"},{"key":"22_CR82","doi-asserted-by":"crossref","unstructured":"Tsalicoglou, C., Manhardt, F., Tonioni, A., Niemeyer, M., Tombari, F.: Textmesh: generation of realistic 3D meshes from text prompts. arXiv preprint arXiv:2304.12439 (2023)","DOI":"10.1109\/3DV62453.2024.00154"},{"key":"22_CR83","doi-asserted-by":"crossref","unstructured":"Wan, W., Dou, Z., Komura, T., Wang, W., Jayaraman, D., Liu, L.: TLControl: trajectory and language control for human motion synthesis. arXiv preprint arXiv:2311.17135 (2023)","DOI":"10.1007\/978-3-031-72913-3_3"},{"key":"22_CR84","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"22_CR85","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, Z., Ling, J., Xie, R., Song, L.: 360-degree panorama generation from few unregistered NFOV images. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 6811\u20136821 (2023)","DOI":"10.1145\/3581783.3612508"},{"key":"22_CR86","unstructured":"Wang, P., Liu, L., Liu, Y., Theobalt, C., Komura, T., Wang, W.: NeuS: learning neural implicit surfaces by volume rendering for multi-view reconstruction. arXiv preprint arXiv:2106.10689 (2021)"},{"key":"22_CR87","doi-asserted-by":"crossref","unstructured":"Wang, T., et\u00a0al.: Rodin: a generative model for sculpting 3D digital avatars using diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4563\u20134573 (2023)","DOI":"10.1109\/CVPR52729.2023.00443"},{"key":"22_CR88","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Zolly: zoom focal length correctly for perspective-distorted human mesh reconstruction. arXiv preprint arXiv:2303.13796 (2023)","DOI":"10.1109\/ICCV51070.2023.00363"},{"key":"22_CR89","unstructured":"Wang, Y., et al.: Humancoser: layered 3D human generation via semantic-aware diffusion model. arXiv preprint arXiv:2312.05804 (2023)"},{"key":"22_CR90","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3D generation with variational score distillation. arXiv preprint arXiv:2305.16213 (2023)"},{"key":"22_CR91","unstructured":"Watson, D., Chan, W., Martin-Brualla, R., Ho, J., Tagliasacchi, A., Norouzi, M.: Novel view synthesis with diffusion models. arXiv preprint arXiv:2210.04628 (2022)"},{"key":"22_CR92","unstructured":"Weng, Z., Wang, Z., Yeung, S.: Zeroavatar: Zero-shot 3D avatar generation from a single image. arXiv preprint arXiv:2305.16411 (2023)"},{"key":"22_CR93","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: HD-fusion: detailed text-to-3D generation leveraging multiple noise estimation. arXiv preprint arXiv:2307.16183 (2023)","DOI":"10.1109\/WACV57701.2024.00317"},{"key":"22_CR94","doi-asserted-by":"crossref","unstructured":"Wu, T., et al.: GPT-4v(ision) is a human-aligned evaluator for text-to-3D generation. arXiv preprint arXiv:2401.04092 (2024)","DOI":"10.1109\/CVPR52733.2024.02098"},{"key":"22_CR95","doi-asserted-by":"crossref","unstructured":"Xiu, Y., Yang, J., Cao, X., Tzionas, D., Black, M.J.: Econ: explicit clothed humans optimized via normal integration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 512\u2013523 (2023)","DOI":"10.1109\/CVPR52729.2023.00057"},{"key":"22_CR96","doi-asserted-by":"crossref","unstructured":"Xu, Y., Yifan, W., Bergman, A.W., Chai, M., Zhou, B., Wetzstein, G.: Efficient 3D articulated human generation with layered surface volumes. arXiv preprint arXiv:2307.05462 (2023)","DOI":"10.1109\/3DV62453.2024.00047"},{"key":"22_CR97","unstructured":"Xu, Y., Yang, Z., Yang, Y.: Seeavatar: photorealistic text-to-3D avatar generation with constrained geometry and appearance. arXiv preprint arXiv:2312.08889 (2023)"},{"key":"22_CR98","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Synbody: synthetic dataset with layered human models for 3D human perception and modeling (2023)","DOI":"10.1109\/ICCV51070.2023.01855"},{"key":"22_CR99","unstructured":"Youwang, K., Oh, T.H.: Text-driven human avatar generation by neural re-parameterized texture optimization"},{"key":"22_CR100","doi-asserted-by":"crossref","unstructured":"Yu, C., Zhou, Q., Li, J., Zhang, Z., Wang, Z., Wang, F.: Points-to-3D: bridging the gap between sparse points and shape-controllable text-to-3D generation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 6841\u20136850 (2023)","DOI":"10.1145\/3581783.3612232"},{"key":"22_CR101","doi-asserted-by":"crossref","unstructured":"Yu, J., Zhu, H., Jiang, L., Loy, C.C., Cai, W., Wu, W.: Painthuman: towards high-fidelity text-to-3D human texturing via denoised score distillation. arXiv preprint arXiv:2310.09458 (2023)","DOI":"10.1609\/aaai.v38i7.28504"},{"key":"22_CR102","doi-asserted-by":"crossref","unstructured":"Yu, X., et\u00a0al.: MVImgNet: a large-scale dataset of multi-view images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9150\u20139161 (2023)","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"22_CR103","doi-asserted-by":"crossref","unstructured":"Yu, Z., Cheng, W., Liu, X., Wu, W., Lin, K.Y.: Monohuman: animatable human neural field from monocular video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16943\u201316953 (2023)","DOI":"10.1109\/CVPR52729.2023.01625"},{"key":"22_CR104","unstructured":"Yu, Z., et\u00a0al.: Surf-D: high-quality surface generation for arbitrary topologies using diffusion models. arXiv preprint arXiv:2311.17050 (2023)"},{"key":"22_CR105","doi-asserted-by":"crossref","unstructured":"Zhang, H., Feng, Y., Kulits, P., Wen, Y., Thies, J., Black, M.J.: Text-guided generation and editing of compositional 3D avatars. arXiv preprint arXiv:2309.07125 (2023)","DOI":"10.1109\/3DV62453.2024.00151"},{"issue":"4","key":"22_CR106","first-page":"1","volume":"42","author":"H Zhang","year":"2023","unstructured":"Zhang, H., et al.: Learning physically simulated tennis skills from broadcast videos. ACM Trans. Graph. (TOG) 42(4), 1\u201314 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"22_CR107","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: Avatarverse: high-quality & stable 3D avatar creation from text and pose. arXiv preprint arXiv:2308.03610 (2023)","DOI":"10.1609\/aaai.v38i7.28540"},{"key":"22_CR108","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1007\/978-3-031-25066-8_39","volume-title":"ECCV 2022","author":"J Zhang","year":"2023","unstructured":"Zhang, J., et al.: AvatarGen: a 3D generative model for animatable human avatars. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13803, pp. 668\u2013685. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-25066-8_39"},{"key":"22_CR109","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1007\/978-3-031-19784-0_20","volume-title":"ECCV 2022","author":"J Zhang","year":"2022","unstructured":"Zhang, J., et al.: 3D-aware semantic-guided generative model for human synthesis. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13675, pp. 339\u2013356. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19784-0_20"},{"key":"22_CR110","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"issue":"6","key":"22_CR111","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480500","volume":"40","author":"X Zhang","year":"2021","unstructured":"Zhang, X., Srinivasan, P.P., Deng, B., Debevec, P., Freeman, W.T., Barron, J.T.: NeRFactor: neural factorization of shape and reflectance under an unknown illumination. ACM Trans. Graph. (ToG) 40(6), 1\u201318 (2021)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"22_CR112","doi-asserted-by":"crossref","unstructured":"Zhang, X., et al.: Getavatar: generative textured meshes for animatable human avatars. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2273\u20132282 (2023)","DOI":"10.1109\/ICCV51070.2023.00216"},{"key":"22_CR113","doi-asserted-by":"crossref","unstructured":"Zhou, W., et al.: EMDM: efficient motion diffusion model for fast, high-quality motion generation. arXiv preprint arXiv:2309.11351 (2023)","DOI":"10.1007\/978-3-031-72627-9_2"},{"key":"22_CR114","unstructured":"Zhu, J., Zhuang, P.: HIFA: high-fidelity text-to-3D with advanced diffusion guidance. arXiv preprint arXiv:2305.18766 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72943-0_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T06:07:09Z","timestamp":1733119629000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72943-0_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031729423","9783031729430"],"references-count":114,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72943-0_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}