{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T00:48:31Z","timestamp":1774658911425,"version":"3.50.1"},"reference-count":113,"publisher":"Association for Computing Machinery (ACM)","issue":"6","license":[{"start":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T00:00:00Z","timestamp":1731974400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100018694","name":"HORIZON EUROPE Marie Sklodowska-Curie Actions","doi-asserted-by":"publisher","award":["860768"],"award-info":[{"award-number":["860768"]}],"id":[{"id":"10.13039\/100018694","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Graph."],"published-print":{"date-parts":[[2024,12,19]]},"abstract":"<jats:p>\n            Generating\n            <jats:bold>personalized<\/jats:bold>\n            3D avatars is crucial for AR\/VR. However, recent text-to-3D methods that generate avatars for celebrities or fictional characters, struggle with everyday people. Methods for faithful reconstruction typically require full-body images in controlled settings. What if users could just upload their personal \"OOTD\" (Outfit Of The Day) photo collection and get a faithful avatar in return? The challenge is that such casual photo collections contain diverse poses, challenging viewpoints, cropped views, and occlusion (albeit with a consistent outfit, accessories and hairstyle). We address this novel \"\n            <jats:bold>Album2Human<\/jats:bold>\n            \" task by developing\n            <jats:bold>PuzzleAvatar<\/jats:bold>\n            , a novel model that generates a faithful 3D avatar (in a canonical pose) from a personal OOTD album, bypassing the challenging estimation of body and camera pose. To this end, we fine-tune a foundational vision-language model (VLM) on such photos, encoding the appearance, identity, garments, hairstyles, and accessories of a person into separate learned tokens, instilling these cues into the VLM. In effect, we exploit the learned tokens as \"puzzle pieces\" from which we assemble a faithful, personalized 3D avatar. Importantly, we can customize avatars by simply inter-changing tokens. As a benchmark for this new task, we create a new dataset, called\n            <jats:bold>PuzzleIOI<\/jats:bold>\n            , with 41 subjects in a total of nearly 1k OOTD configurations, in challenging partial photos with paired ground-truth 3D bodies. Evaluation shows that PuzzleAvatar not only has high reconstruction accuracy, outperforming TeCH and MVDreamBooth, but also a unique scalability to album photos, and demonstrating strong robustness. Our code and data are publicly available for research purpose at\n            <jats:italic>puzzleavatar.is.tue.mpg.de<\/jats:italic>\n          <\/jats:p>","DOI":"10.1145\/3687771","type":"journal-article","created":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T15:46:04Z","timestamp":1732031164000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["PuzzleAvatar: Assembling 3D Avatars from Personal Albums"],"prefix":"10.1145","volume":"43","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0165-5909","authenticated-orcid":false,"given":"Yuliang","family":"Xiu","sequence":"first","affiliation":[{"name":"Max Planck Institute for Intelligent Systems, T\u00fcbingen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8767-0848","authenticated-orcid":false,"given":"Yufei","family":"Ye","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4599-3206","authenticated-orcid":false,"given":"Zhen","family":"Liu","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Intelligent Systems, T\u00fcbingen, Germany"},{"name":"Mila, Universit\u00e9 de Montr\u00e9al, T\u00fcbingen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7963-2582","authenticated-orcid":false,"given":"Dimitris","family":"Tzionas","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6077-4540","authenticated-orcid":false,"given":"Michael J.","family":"Black","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Intelligent Systems, T\u00fcbingen, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,11,19]]},"reference":[{"key":"e_1_2_1_1_1","volume-title":"George Vogiatzis, Engin Tola, and Anders Bjorholm Dahl.","author":"Aan\u00e6s Henrik","year":"2016","unstructured":"Henrik Aan\u00e6s, Rasmus Ramsb\u00f8l Jensen, George Vogiatzis, Engin Tola, and Anders Bjorholm Dahl. 2016. Large-scale data for multiple-view stereopsis. International Journal of Computer Vision (IJCV) (2016)."},{"key":"e_1_2_1_2_1","volume-title":"Neural Jacobian Fields: Learning Intrinsic Mappings of Arbitrary Meshes. International Conference on Computer Graphics and Interactive Techniques (SIGGRAPH)","author":"Aigerman Noam","year":"2022","unstructured":"Noam Aigerman, Kunal Gupta, Vladimir G Kim, Siddhartha Chaudhuri, Jun Saito, and Thibault Groueix. 2022. Neural Jacobian Fields: Learning Intrinsic Mappings of Arbitrary Meshes. International Conference on Computer Graphics and Interactive Techniques (SIGGRAPH) (2022)."},{"key":"e_1_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2018.00022"},{"key":"e_1_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Thiemo Alldieck Marcus A. Magnor Weipeng Xu Christian Theobalt and Gerard Pons-Moll. 2018b. Video Based Reconstruction of 3D People Models. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00875"},{"key":"e_1_2_1_5_1","volume-title":"International Conference on Computer Vision (ICCV)","author":"Athanasiou Nikos","year":"2023","unstructured":"Nikos Athanasiou, Mathis Petrovich, Michael J. Black, and G\u00fcl Varol. 2023. SINC: Spatial Composition of 3D Human Motions for Simultaneous Action Generation. International Conference on Computer Vision (ICCV) (2023)."},{"key":"e_1_2_1_6_1","volume-title":"SIGGRAPH Asia 2023 Conference Papers (SA '23)","author":"Avrahami Omri","year":"2023","unstructured":"Omri Avrahami, Kfir Aberman, Ohad Fried, Daniel Cohen-Or, and Dani Lischinski. 2023. Break-A-Scene: Extracting Multiple Concepts from a Single Image. In SIGGRAPH Asia 2023 Conference Papers (SA '23)."},{"key":"e_1_2_1_7_1","volume-title":"Davison","author":"Bae Gwangbin","year":"2024","unstructured":"Gwangbin Bae and Andrew J. Davison. 2024. Rethinking Inductive Biases for Surface Normal Estimation. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_8_1","volume-title":"Nope-nerf: Optimising neural radiance field with no pose prior. In Computer Vision and Pattern Recognition (CVPR).","author":"Bian Wenjing","year":"2023","unstructured":"Wenjing Bian, Zirui Wang, Kejie Li, Jia-Wang Bian, and Victor Adrian Prisacariu. 2023. Nope-nerf: Optimising neural radiance field with no pose prior. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_9_1","unstructured":"Tim Brooks Bill Peebles Connor Holmes Will DePue Yufei Guo Li Jing David Schnurr Joe Taylor Troy Luhman Eric Luhman Clarence Ng Ricky Wang and Aditya Ramesh. 2024. Video generation models as world simulators. (2024)."},{"key":"e_1_2_1_10_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Cai Zhongang","year":"2022","unstructured":"Zhongang Cai, Daxuan Ren, Ailing Zeng, Zhengyu Lin, Tao Yu, Wenjia Wang, Xiangyu Fan, Yang Gao, Yifan Yu, Liang Pan, Fangzhou Hong, Mingyuan Zhang, Chen Change Loy, Lei Yang, and Ziwei Liu. 2022. HuMMan: Multi-modal 4D human dataset for versatile sensing and modeling. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_2_1_11_1","volume-title":"DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models. Computer Vision and Pattern Recognition (CVPR)","author":"Cao Yukang","year":"2024","unstructured":"Yukang Cao, Yan-Pei Cao, Kai Han, Ying Shan, and Kwan-Yee K Wong. 2024. DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models. Computer Vision and Pattern Recognition (CVPR) (2024)."},{"key":"e_1_2_1_12_1","volume-title":"Personalized Restoration via Dual-Pivot Tuning. arXiv preprint arXiv:2312.17234","author":"Chari Pradyumna","year":"2023","unstructured":"Pradyumna Chari, Sizhuo Ma, Daniil Ostashev, Achuta Kadambi, Gurunandan Krishnan, Jian Wang, and Kfir Aberman. 2023. Personalized Restoration via Dual-Pivot Tuning. arXiv preprint arXiv:2312.17234 (2023)."},{"key":"e_1_2_1_13_1","volume-title":"Total Selfie: Generating Full-Body Selfies. In Computer Vision and Pattern Recognition (CVPR).","author":"Chen Bowei","year":"2024","unstructured":"Bowei Chen, Brian Curless, Ira Kemelmacher-Shlizerman, and Steve Seitz. 2024b. Total Selfie: Generating Full-Body Selfies. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_14_1","volume-title":"Ultraman: Single Image 3D Human Reconstruction with Ultra Speed and Detail. arXiv preprint arXiv:2403.12028","author":"Chen Mingjin","year":"2024","unstructured":"Mingjin Chen, Junhao Chen, Xiaojun Ye, Huan-ang Gao, Xiaoxue Chen, Zhaoxin Fan, and Hao Zhao. 2024a. Ultraman: Single Image 3D Human Reconstruction with Ultra Speed and Detail. arXiv preprint arXiv:2403.12028 (2024)."},{"key":"e_1_2_1_15_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Chen Rui","year":"2023","unstructured":"Rui Chen, Yongwei Chen, Ningxin Jiao, and Kui Jia. 2023. Fantasia3D: Disentangling Geometry and Appearance for High-quality Text-to-3D Content Creation. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_16_1","volume-title":"DNA-Rendering: A Diverse Neural Actor Repository for High-Fidelity Human-centric Rendering. In International Conference on Computer Vision (ICCV).","author":"Cheng Wei","year":"2023","unstructured":"Wei Cheng, Ruixiang Chen, Wanqi Yin, Siming Fan, Keyu Chen, Honglin He, Huiwen Luo, Zhongang Cai, Jingbo Wang, Yang Gao, Zhengming Yu, Zhengyu Lin, Daxuan Ren, Lei Yang, Ziwei Liu, Chen Change Loy, Chen Qian, Wayne Wu, Dahua Lin, Bo Dai, and Kwan-Yee Lin. 2023. DNA-Rendering: A Diverse Neural Actor Repository for High-Fidelity Human-centric Rendering. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_17_1","volume-title":"Black","author":"Choutas Vasileios","year":"2022","unstructured":"Vasileios Choutas, Lea M\u00fcller, Chun-Hao P. Huang, Siyu Tang, Dimitrios Tzionas, and Michael J. Black. 2022. Accurate 3D Body Shape Regression using Metric and Semantic Attributes. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_18_1","volume-title":"Objaverse: A universe of annotated 3d objects. In Computer Vision and Pattern Recognition (CVPR).","author":"Deitke Matt","year":"2023","unstructured":"Matt Deitke, Dustin Schwenk, Jordi Salvador, Luca Weihs, Oscar Michel, Eli VanderBilt, Ludwig Schmidt, Kiana Ehsani, Aniruddha Kembhavi, and Ali Farhadi. 2023. Objaverse: A universe of annotated 3d objects. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_19_1","volume-title":"International Conference on 3D Vision (3DV).","author":"Feng Yao","unstructured":"Yao Feng, Vasileios Choutas, Timo Bolkart, Dimitrios Tzionas, and Michael J. Black. 2021. Collaborative Regression of Expressive Bodies using Moderation. In International Conference on 3D Vision (3DV)."},{"key":"e_1_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555423"},{"key":"e_1_2_1_21_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Fu Xiao","year":"2024","unstructured":"Xiao Fu, Wei Yin, Mu Hu, Kaixuan Wang, Yuexin Ma, Ping Tan, Shaojie Shen, Dahua Lin, and Xiaoxiao Long. 2024b. GeoWizard: Unleashing the Diffusion Priors for 3D Geometry Estimation from a Single Image. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_2_1_22_1","volume-title":"COLMAP-Free 3D Gaussian Splatting. Computer Vision and Pattern Recognition (CVPR)","author":"Fu Yang","year":"2024","unstructured":"Yang Fu, Sifei Liu, Amey Kulkarni, Jan Kautz, Alexei A Efros, and Xiaolong Wang. 2024a. COLMAP-Free 3D Gaussian Splatting. Computer Vision and Pattern Recognition (CVPR) (2024)."},{"key":"e_1_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Gege Gao Weiyang Liu Anpei Chen Andreas Geiger and Bernhard Sch\u00f6lkopf. 2024b. GraphDreamer: Compositional 3D Scene Synthesis from Scene Graphs. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.02012"},{"key":"e_1_2_1_24_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Gao Jun","year":"2020","unstructured":"Jun Gao, Wenzheng Chen, Tommy Xiang, Alec Jacobson, Morgan McGuire, and Sanja Fidler. 2020. Learning deformable tetrahedral meshes for 3d reconstruction. Conference on Neural Information Processing Systems (NeurIPS) (2020)."},{"key":"e_1_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Xiangjun Gao Xiaoyu Li Chaopeng Zhang Qi Zhang Yanpei Cao Ying Shan and Long Quan. 2024a. ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.00961"},{"key":"e_1_2_1_26_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Gralnik Omer","year":"2023","unstructured":"Omer Gralnik, Guy Gafni, and Ariel Shamir. 2023. Semantify: Simplifying the Control of 3D Morphable Models Using CLIP. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_27_1","volume-title":"TetSphere Splatting: Representing High-Quality Geometry with Lagrangian Volumetric Meshes. arXiv preprint arXiv:2405.20283","author":"Guo Minghao","year":"2024","unstructured":"Minghao Guo, Bohan Wang, Kaiming He, and Wojciech Matusik. 2024. TetSphere Splatting: Representing High-Quality Geometry with Lagrangian Volumetric Meshes. arXiv preprint arXiv:2405.20283 (2024)."},{"key":"e_1_2_1_28_1","volume-title":"Computer Vision and Pattern Recognition (CVPR)","author":"Habermann Marc","unstructured":"Marc Habermann, Weipeng Xu, Michael Zollhoefer, Gerard Pons-Moll, and Christian Theobalt. 2020. DeepCap: Monocular Human Performance Capture Using Weak Supervision. In Computer Vision and Pattern Recognition (CVPR). IEEE."},{"key":"e_1_2_1_29_1","volume-title":"Avatarclip: Zero-shot text-driven generation and animation of 3d avatars. Transactions on Graphics (TOG)","author":"Hong Fangzhou","year":"2022","unstructured":"Fangzhou Hong, Mingyuan Zhang, Liang Pan, Zhongang Cai, Lei Yang, and Ziwei Liu. 2022. Avatarclip: Zero-shot text-driven generation and animation of 3d avatars. Transactions on Graphics (TOG) (2022)."},{"key":"e_1_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657428"},{"key":"e_1_2_1_31_1","volume-title":"HumanNorm: Learning normal diffusion model for high-quality and realistic 3d human generation. Computer Vision and Pattern Recognition (CVPR)","author":"Huang Xin","year":"2024","unstructured":"Xin Huang, Ruizhi Shao, Qi Zhang, Hongwen Zhang, Ying Feng, Yebin Liu, and Qing Wang. 2024a. HumanNorm: Learning normal diffusion model for high-quality and realistic 3d human generation. Computer Vision and Pattern Recognition (CVPR) (2024)."},{"key":"e_1_2_1_32_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Huang Yukun","year":"2023","unstructured":"Yukun Huang, Jianan Wang, Ailing Zeng, He Cao, Xianbiao Qi, Yukai Shi, Zheng-Jun Zha, and Lei Zhang. 2023a. DreamWaltz: Make a Scene with Complex 3D Animatable Avatars. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_2_1_33_1","volume-title":"One-shot Implicit Animatable Avatars with Model-based Priors. In International Conference on Computer Vision (ICCV).","author":"Huang Yangyi","year":"2023","unstructured":"Yangyi Huang, Hongwei Yi, Weiyang Liu, Haofan Wang, Boxi Wu, Wenxiao Wang, Binbin Lin, Debing Zhang, and Deng Cai. 2023b. One-shot Implicit Animatable Avatars with Model-based Priors. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_34_1","volume-title":"TeCH: Text-guided Reconstruction of Lifelike Clothed Humans. In International Conference on 3D Vision (3DV).","author":"Huang Yangyi","year":"2024","unstructured":"Yangyi Huang, Hongwei Yi, Yuliang Xiu, Tingting Liao, Jiaxiang Tang, Deng Cai, and Justus Thies. 2024b. TeCH: Text-guided Reconstruction of Lifelike Clothed Humans. In International Conference on 3D Vision (3DV)."},{"key":"e_1_2_1_35_1","volume-title":"ARCH: Animatable Reconstruction of Clothed Humans. In Computer Vision and Pattern Recognition (CVPR).","author":"Huang Zeng","year":"2020","unstructured":"Zeng Huang, Yuanlu Xu, Christoph Lassner, Hao Li, and Tony Tung. 2020. ARCH: Animatable Reconstruction of Clothed Humans. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_36_1","volume-title":"HumanRF: High-Fidelity Neural Radiance Fields for Humans in Motion. Transactions on Graphics (TOG)","author":"I\u015f\u0131k Mustafa","year":"2023","unstructured":"Mustafa I\u015f\u0131k, Martin R\u00fcnz, Markos Georgopoulos, Taras Khakhulin, Jonathan Starck, Lourdes Agapito, and Matthias Nie\u00dfner. 2023. HumanRF: High-Fidelity Neural Radiance Fields for Humans in Motion. Transactions on Graphics (TOG) (2023)."},{"key":"e_1_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Ajay Jain Ben Mildenhall Jonathan T. Barron Pieter Abbeel and Ben Poole. 2022. Zero-Shot Text-Guided Object Generation with Dream Fields. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"e_1_2_1_38_1","volume-title":"Noise-free Score Distillation. In International Conference on Learning Representations (ICLR).","author":"Katzir Oren","year":"2024","unstructured":"Oren Katzir, Or Patashnik, Daniel Cohen-Or, and Dani Lischinski. 2024. Noise-free Score Distillation. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_2_1_39_1","volume-title":"Rodrigo Caye Daudt, and Konrad Schindler","author":"Ke Bingxin","year":"2024","unstructured":"Bingxin Ke, Anton Obukhov, Shengyu Huang, Nando Metzger, Rodrigo Caye Daudt, and Konrad Schindler. 2024. Repurposing diffusion-based image generators for monocular depth estimation. Computer Vision and Pattern Recognition (CVPR) (2024)."},{"key":"e_1_2_1_40_1","volume-title":"GALA: Generating Animatable Layered Assets from a Single Scan.","author":"Kim Taeksoo","year":"2024","unstructured":"Taeksoo Kim, Byungjun Kim, Shunsuke Saito, and Hanbyul Joo. 2024. GALA: Generating Animatable Layered Assets from a Single Scan."},{"key":"e_1_2_1_41_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C Berg, Wan-Yen Lo, et al. 2023. Segment anything. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Peter Kocsis Vincent Sitzmann and Matthias Nie\u00dfner. 2024. Intrinsic Image Diffusion for Single-view Material Estimation. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.00497"},{"key":"e_1_2_1_43_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Kolotouros Nikos","year":"2023","unstructured":"Nikos Kolotouros, Thiemo Alldieck, Andrei Zanfir, Eduard Gabriel Bazavan, Mihai Fieraru, and Cristian Sminchisescu. 2023. DreamHuman: Animatable 3D Avatars from Text. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_2_1_44_1","volume-title":"Multi-Concept Customization of Text-to-Image Diffusion. Computer Vision and Pattern Recognition (CVPR)","author":"Kumari Nupur","year":"2023","unstructured":"Nupur Kumari, Bingliang Zhang, Richard Zhang, Eli Shechtman, and Jun-Yan Zhu. 2023. Multi-Concept Customization of Text-to-Image Diffusion. Computer Vision and Pattern Recognition (CVPR) (2023)."},{"key":"e_1_2_1_45_1","volume-title":"International Conference on Machine Learning (ICML). PMLR.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International Conference on Machine Learning (ICML). PMLR."},{"key":"e_1_2_1_46_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Li Ruilong","year":"2020","unstructured":"Ruilong Li, Yuliang Xiu, Shunsuke Saito, Zeng Huang, Kyle Olszewski, and Hao Li. 2020. Monocular real-time volumetric performance capture. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_2_1_47_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Li Ruilong","year":"2021","unstructured":"Ruilong Li, Shan Yang, David A Ross, and Angjoo Kanazawa. 2021. Ai choreographer: Music conditioned 3d dance generation with aist+ +. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_48_1","unstructured":"Zhen Li Mingdeng Cao Xintao Wang Zhongang Qi Ming-Ming Cheng and Ying Shan. 2024. PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_49_1","volume-title":"Test-Time Robust Personalization for Federated Learning. In International Conference on Learning Representations (ICLR).","author":"Liangze Jiang","year":"2023","unstructured":"Jiang Liangze and Tao Lin. 2023. Test-Time Robust Personalization for Federated Learning. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_2_1_50_1","volume-title":"International Conference on 3D Vision (3DV).","author":"Liao Tingting","unstructured":"Tingting Liao, Hongwei Yi, Yuliang Xiu, Jiaxiang Tang, Yangyi Huang, Justus Thies, and Michael J. Black. 2024. TADA! Text to Animatable Digital Avatars. In International Conference on 3D Vision (3DV)."},{"key":"e_1_2_1_51_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Lin Chen-Hsuan","year":"2021","unstructured":"Chen-Hsuan Lin, Wei-Chiu Ma, Antonio Torralba, and Simon Lucey. 2021. Barf: Bundle-adjusting neural radiance fields. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_52_1","volume-title":"FastHuman: Reconstructing High-Quality Clothed Human in Minutes. In International Conference on 3D Vision, 3DV.","author":"Lin Lixiang","year":"2024","unstructured":"Lixiang Lin, Songyou Peng, Qijun Gan, and Jianke Zhu. 2024. FastHuman: Reconstructing High-Quality Clothed Human in Minutes. In International Conference on 3D Vision, 3DV."},{"key":"e_1_2_1_53_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Liu Ruoshi","year":"2023","unstructured":"Ruoshi Liu, Rundi Wu, Basile Van Hoorick, Pavel Tokmakov, Sergey Zakharov, and Carl Vondrick. 2023. Zero-1-to-3: Zero-shot One Image to 3D Object. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_54_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Liu Weiyang","year":"2024","unstructured":"Weiyang Liu, Zeju Qiu, Yao Feng, Yuliang Xiu, Yuxuan Xue, Longhui Yu, Haiwen Feng, Zhen Liu, Juyeon Heo, Songyou Peng, Yandong Wen, Michael J. Black, Adrian Weller, and Bernhard Sch\u00f6lkopf. 2024b. Parameter-Efficient Orthogonal Finetuning via Butterfly Factorization. International Conference on Learning Representations (ICLR) (2024)."},{"key":"e_1_2_1_55_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Liu Zhen","year":"2024","unstructured":"Zhen Liu, Yao Feng, Yuliang Xiu, Weiyang Liu, Liam Paull, Michael J. Black, and Bernhard Sch\u00f6lkopf. 2024a. Ghost on The Shell: An Expressive Representation of General 3D Shapes. International Conference on Learning Representations (ICLR) (2024)."},{"key":"e_1_2_1_56_1","unstructured":"Simian Luo Yiqin Tan Longbo Huang Jian Li and Hang Zhao. 2023. Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference. arXiv:2310.04378"},{"key":"e_1_2_1_57_1","volume-title":"Black","author":"Ma Qianli","year":"2020","unstructured":"Qianli Ma, Jinlong Yang, Anurag Ranjan, Sergi Pujades, Gerard Pons-Moll, Siyu Tang, and Michael J. Black. 2020. Learning to Dress 3D People in Generative Clothing. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_58_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Mahmood Naureen","unstructured":"Naureen Mahmood, Nima Ghorbani, Nikolaus F. Troje, Gerard Pons-Moll, and Michael J. Black. 2019. AMASS: Archive of Motion Capture as Surface Shapes. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_59_1","volume-title":"Jonathan T Barron, Alexey Dosovitskiy, and Daniel Duckworth.","author":"Martin-Brualla Ricardo","year":"2021","unstructured":"Ricardo Martin-Brualla, Noha Radwan, Mehdi SM Sajjadi, Jonathan T Barron, Alexey Dosovitskiy, and Daniel Duckworth. 2021. Nerf in the wild: Neural radiance fields for unconstrained photo collections. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Andreas Meuleman Yu-Lun Liu Chen Gao Jia-Bin Huang Changil Kim Min H Kim and Johannes Kopf. 2023. Progressively optimized local radiance fields for robust view synthesis. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.01587"},{"key":"e_1_2_1_61_1","volume-title":"Nerf: Representing scenes as neural radiance fields for view synthesis. Commun. ACM","author":"Mildenhall Ben","year":"2021","unstructured":"Ben Mildenhall, Pratul P Srinivasan, Matthew Tancik, Jonathan T Barron, Ravi Ramamoorthi, and Ren Ng. 2021. Nerf: Representing scenes as neural radiance fields for view synthesis. Commun. ACM (2021)."},{"key":"e_1_2_1_62_1","unstructured":"OpenAI. 2023. GPT-4V(ision) system card."},{"key":"e_1_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1002\/cav.2101"},{"key":"e_1_2_1_64_1","volume-title":"Joachim Tesch, David Hoffmann, Shashank Tripathi, and Michael J. Black.","author":"Patel Priyanka","year":"2021","unstructured":"Priyanka Patel, Chun-Hao Paul Huang, Joachim Tesch, David Hoffmann, Shashank Tripathi, and Michael J. Black. 2021. AGORA: Avatars in Geography Optimized for Regression Analysis. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_65_1","volume-title":"Dimitrios Tzionas, and Michael J Black.","author":"Pavlakos Georgios","year":"2019","unstructured":"Georgios Pavlakos, Vasileios Choutas, Nima Ghorbani, Timo Bolkart, Ahmed AA Osman, Dimitrios Tzionas, and Michael J Black. 2019. Expressive body capture: 3d hands, face, and body from a single image. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_66_1","volume-title":"Implicit Neural Representations with Structured Latent Codes for Human Body Modeling. Transactions on Pattern Analysis and Machine Intelligence (TPAMI)","author":"Peng Sida","year":"2023","unstructured":"Sida Peng, Chen Geng, Yuanqing Zhang, Yinghao Xu, Qianqian Wang, Qing Shuai, Xiaowei Zhou, and Hujun Bao. 2023. Implicit Neural Representations with Structured Latent Codes for Human Body Modeling. Transactions on Pattern Analysis and Machine Intelligence (TPAMI) (2023)."},{"key":"e_1_2_1_67_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan T Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3d using 2d diffusion. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_2_1_68_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML). PMLR.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML). PMLR."},{"key":"e_1_2_1_69_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Raj Amit","year":"2023","unstructured":"Amit Raj, Srinivas Kaza, Ben Poole, Michael Niemeyer, Ben Mildenhall, Nataniel Ruiz, Shiran Zada, Kfir Aberman, Michael Rubenstein, Jonathan Barron, Yuanzhen Li, and Varun Jampani. 2023. DreamBooth3D: Subject-Driven Text-to-3D Generation. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_70_1","unstructured":"Tianhe Ren Shilong Liu Ailing Zeng Jing Lin Kunchang Li He Cao Jiayu Chen Xinyu Huang Yukang Chen Feng Yan Zhaoyang Zeng Hao Zhang Feng Li Jie Yang Hongyang Li Qing Jiang and Lei Zhang. 2024. Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. arXiv:2401.14159 [cs.CV]"},{"key":"e_1_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_2_1_72_1","volume-title":"RB-Modulation: Training-Free Personalization of Diffusion Models using Stochastic Optimal Control. arXiv preprint arXiv:2405.17401","author":"Rout Litu","year":"2024","unstructured":"Litu Rout, Yujia Chen, Nataniel Ruiz, Abhishek Kumar, Constantine Caramanis, Sanjay Shakkottai, and Wen-Sheng Chu. 2024. RB-Modulation: Training-Free Personalization of Diffusion Models using Stochastic Optimal Control. arXiv preprint arXiv:2405.17401 (2024)."},{"key":"e_1_2_1_73_1","doi-asserted-by":"crossref","unstructured":"Nataniel Ruiz Yuanzhen Li Varun Jampani Yael Pritch Michael Rubinstein and Kfir Aberman. 2023. DreamBooth: Fine tuning text-to-image diffusion models for subject-driven generation. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_2_1_74_1","volume-title":"PIFu: Pixel-Aligned Implicit Function for High-Resolution Clothed Human Digitization. In International Conference on Computer Vision (ICCV).","author":"Saito Shunsuke","year":"2019","unstructured":"Shunsuke Saito, Zeng Huang, Ryota Natsume, Shigeo Morishima, Hao Li, and Angjoo Kanazawa. 2019. PIFu: Pixel-Aligned Implicit Function for High-Resolution Clothed Human Digitization. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_75_1","doi-asserted-by":"crossref","unstructured":"Shunsuke Saito Tomas Simon Jason Saragih and Hanbyul Joo. 2020. PIFuHD: MultiLevel Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR42600.2020.00016"},{"key":"e_1_2_1_76_1","doi-asserted-by":"crossref","unstructured":"Kaiyue Shen Chen Guo Manuel Kaufmann Juan Zarate Julien Valentin Jie Song and Otmar Hilliges. 2023. X-Avatar: Expressive Human Avatars. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.01622"},{"key":"e_1_2_1_77_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Shen Tianchang","year":"2021","unstructured":"Tianchang Shen, Jun Gao, Kangxue Yin, Ming-Yu Liu, and Sanja Fidler. 2021. Deep marching tetrahedra: a hybrid representation for high-resolution 3d shape synthesis. Conference on Neural Information Processing Systems (NeurIPS) (2021)."},{"key":"e_1_2_1_78_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Shi Yichun","year":"2024","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Long Mai, Kejie Li, and Xiao Yang. 2024. MV-Dream: Multi-view Diffusion for 3D Generation. International Conference on Learning Representations (ICLR) (2024)."},{"key":"e_1_2_1_79_1","unstructured":"Sanghyun Son Matheus Gadelha Yang Zhou Zexiang Xu Ming C. Lin and Yi Zhou. 2024. DMesh: A Differentiable Representation for General Meshes. arXiv:2404.13445 [cs.CV]"},{"key":"e_1_2_1_80_1","volume-title":"International Conference on Machine Learning (ICML).","author":"Song Yang","year":"2023","unstructured":"Yang Song, Prafulla Dhariwal, Mark Chen, and Ilya Sutskever. 2023. Consistency models. In International Conference on Machine Learning (ICML)."},{"key":"e_1_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530718"},{"key":"e_1_2_1_82_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Tang Jiaxiang","year":"2024","unstructured":"Jiaxiang Tang, Zhaoxi Chen, Xiaokang Chen, Tengfei Wang, Gang Zeng, and Ziwei Liu. 2024a. LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658237"},{"key":"e_1_2_1_84_1","volume-title":"ConsiStory: Training-Free Consistent Text-to-Image Generation. In International Conference on Computer Graphics and Interactive Techniques (SIGGRAPH).","author":"Tewel Yoad","year":"2024","unstructured":"Yoad Tewel, Omri Kaduri, Rinon Gal, Yoni Kasten, Lior Wolf, Gal Chechik, and Yuval Atzmon. 2024. ConsiStory: Training-Free Consistent Text-to-Image Generation. In International Conference on Computer Graphics and Interactive Techniques (SIGGRAPH)."},{"key":"e_1_2_1_85_1","doi-asserted-by":"crossref","unstructured":"Daniel Vlasic Pieter Peers Ilya Baran Paul Debevec Jovan Popovi\u0107 Szymon Rusinkiewicz and Wojciech Matusik. 2009. Dynamic shape capture using multi-view photometric stereo. In ACM SIGGRAPH Asia 2009 Papers.","DOI":"10.1145\/1661412.1618520"},{"key":"e_1_2_1_86_1","doi-asserted-by":"crossref","unstructured":"Haochen Wang Xiaodan Du Jiahao Li Raymond A Yeh and Greg Shakhnarovich. 2023. Score Jacobian Chaining: Lifting Pretrained 2D Diffusion Models for 3D Generation. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"e_1_2_1_87_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Wang Jionghao","year":"2024","unstructured":"Jionghao Wang, Yuan Liu, Zhiyang Dou, Zhengming Yu, Yongqing Liang, Xin Li, Wenping Wang, Rong Xie, and Li Song. 2024c. Disentangled Clothed Avatar Generation from Text Descriptions. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_2_1_88_1","volume-title":"PF-LRM: Pose-Free Large Reconstruction Model for Joint Pose and Shape Prediction. In International Conference on Learning Representations (ICLR).","author":"Wang Peng","year":"2024","unstructured":"Peng Wang, Hao Tan, Sai Bi, Yinghao Xu, Fujun Luan, Kalyan Sunkavalli, Wenping Wang, Zexiang Xu, and Kai Zhang. 2024d. PF-LRM: Pose-Free Large Reconstruction Model for Joint Pose and Shape Prediction. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_2_1_89_1","unstructured":"Qixun Wang Xu Bai Haofan Wang Zekui Qin and Anthony Chen. 2024a. InstantID: Zero-shot Identity-Preserving Generation in Seconds. arXiv preprint arXiv:2401.07519 (2024)."},{"key":"e_1_2_1_90_1","volume-title":"DUSt3R: Geometric 3D Vision Made Easy. Computer Vision and Pattern Recognition (CVPR)","author":"Wang Shuzhe","year":"2024","unstructured":"Shuzhe Wang, Vincent Leroy, Yohann Cabon, Boris Chidlovskii, and Revaud Jerome. 2024b. DUSt3R: Geometric 3D Vision Made Easy. Computer Vision and Pattern Recognition (CVPR) (2024)."},{"key":"e_1_2_1_91_1","volume-title":"Chen Change Loy, and Chao Dong","author":"Wang Xintao","year":"2022","unstructured":"Xintao Wang, Liangbin Xie, Ke Yu, Kelvin C.K. Chan, Chen Change Loy, and Chao Dong. 2022. BasicSR: Open Source Image and Video Restoration Toolbox."},{"key":"e_1_2_1_92_1","volume-title":"NeRF-: Neural radiance fields without known camera parameters. arXiv preprint arXiv:2102.07064","author":"Wang Zirui","year":"2021","unstructured":"Zirui Wang, Shangzhe Wu, Weidi Xie, Min Chen, and Victor Adrian Prisacariu. 2021. NeRF-: Neural radiance fields without known camera parameters. arXiv preprint arXiv:2102.07064 (2021)."},{"key":"e_1_2_1_93_1","doi-asserted-by":"crossref","unstructured":"Chung-Yi Weng Brian Curless Pratul P. Srinivasan Jonathan T. Barron and Ira Kemelmacher-Shlizerman. 2022. HumanNeRF: Free-Viewpoint Rendering of Moving People From Monocular Video. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52688.2022.01573"},{"key":"e_1_2_1_94_1","unstructured":"Rundi Wu Ben Mildenhall Philipp Henzler Keunhong Park Ruiqi Gao Daniel Watson Pratul P. Srinivasan Dor Verbin Jonathan T. Barron Ben Poole and Aleksander Holynski. 2024. ReconFusion: 3D Reconstruction with Diffusion Priors. Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_95_1","volume-title":"British Machine Vision Conference (BMVC).","author":"Xia Yitong","year":"2022","unstructured":"Yitong Xia, Hao Tang, Radu Timofte, and Luc Van Gool. 2022. Sinerf: Sinusoidal neural radiance fields for joint pose estimation and scene reconstruction. In British Machine Vision Conference (BMVC)."},{"key":"e_1_2_1_96_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_2_1_97_1","doi-asserted-by":"crossref","unstructured":"Zhangyang Xiong Chenghong Li Kenkun Liu Hongjie Liao Jianqiao Hu Junyi Zhu Shuliang Ning Lingteng Qiu Chongjie Wang Shijie Wang et al. 2024. MVHumanNet: A Large-scale Dataset of Multi-view Daily Dressing Human Captures. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.01872"},{"key":"e_1_2_1_98_1","volume-title":"Black","author":"Xiu Yuliang","year":"2023","unstructured":"Yuliang Xiu, Jinlong Yang, Xu Cao, Dimitrios Tzionas, and Michael J. Black. 2023. ECON: Explicit Clothed humans Optimized via Normal integration. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_99_1","volume-title":"Black","author":"Xiu Yuliang","year":"2022","unstructured":"Yuliang Xiu, Jinlong Yang, Dimitrios Tzionas, and Michael J. Black. 2022. ICON: Implicit Clothed humans Obtained from Normals. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_100_1","doi-asserted-by":"crossref","unstructured":"Xihe Yang Xingyu Chen Daiheng Gao Shaohui Wang Xiaoguang Han and Baoyuan Wang. 2024. HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.00077"},{"key":"e_1_2_1_101_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Yang Xueting","year":"2023","unstructured":"Xueting Yang, Yihao Luo, Yuliang Xiu, Wei Wang, Hao Xu, and Zhaoxin Fan. 2023. D-IF: Uncertainty-aware Human Digitization via Implicit Distribution Field. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_102_1","volume-title":"StableNormal: Reducing Diffusion Variance for Stable and Sharp Normal. ACM Transactions on Graphics (TOG)","author":"Ye Chongjie","year":"2024","unstructured":"Chongjie Ye, Lingteng Qiu, Xiaodong Gu, Qi Zuo, Yushuang Wu, Zilong Dong, Liefeng Bo, Yuliang Xiu, and Xiaoguang Han. 2024. StableNormal: Reducing Diffusion Variance for Stable and Sharp Normal. ACM Transactions on Graphics (TOG) (2024)."},{"key":"e_1_2_1_103_1","unstructured":"Tao Yu Zerong Zheng Kaiwen Guo Pengpeng Liu Qionghai Dai and Yebin Liu. 2021. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_104_1","volume-title":"Koki Nagano, Jan Kautz, and Umar Iqbal.","author":"Yuan Ye","year":"2024","unstructured":"Ye Yuan, Xueting Li, Yangyi Huang, Shalini De Mello, Koki Nagano, Jan Kautz, and Umar Iqbal. 2024. GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_2_1_105_1","volume-title":"AvatarBooth: High-Quality and Customizable 3D Human Avatar Generation. arXiv:2306.09864","author":"Zeng Yifei","year":"2023","unstructured":"Yifei Zeng, Yuanxun Lu, Xinya Ji, Yao Yao, Hao Zhu, and Xun Cao. 2023. AvatarBooth: High-Quality and Customizable 3D Human Avatar Generation. arXiv:2306.09864 (2023)."},{"key":"e_1_2_1_106_1","doi-asserted-by":"crossref","unstructured":"Chao Zhang Sergi Pujades Michael Black and Gerard Pons-Moll. 2017. Detailed accurate human shape estimation from clothed 3D scan sequences. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR.2017.582"},{"key":"e_1_2_1_107_1","doi-asserted-by":"crossref","unstructured":"Jingbo Zhang Xiaoyu Li Qi Zhang Yanpei Cao Ying Shan and Jing Liao. 2024a. HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52733.2024.00181"},{"key":"e_1_2_1_108_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Zhang Jason Y","year":"2024","unstructured":"Jason Y Zhang, Amy Lin, Moneish Kumar, Tzu-Hsuan Yang, Deva Ramanan, and Shubham Tulsiani. 2024b. Cameras as Rays: Pose Estimation via Ray Diffusion. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_2_1_109_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Zhang Lvmin","year":"2023","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_110_1","volume-title":"PaMIR: Parametric Model-conditioned Implicit Representation for image-based human reconstruction. Transactions on Pattern Analysis and Machine Intelligence (TPAMI)","author":"Zheng Zerong","year":"2021","unstructured":"Zerong Zheng, Tao Yu, Yebin Liu, and Qionghai Dai. 2021. PaMIR: Parametric Model-conditioned Implicit Representation for image-based human reconstruction. Transactions on Pattern Analysis and Machine Intelligence (TPAMI) (2021)."},{"key":"e_1_2_1_111_1","volume-title":"International Conference on Computer Vision (ICCV).","author":"Zheng Zerong","year":"2019","unstructured":"Zerong Zheng, Tao Yu, Yixuan Wei, Qionghai Dai, and Yebin Liu. 2019. DeepHuman: 3D Human Reconstruction From a Single Image. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_2_1_112_1","unstructured":"Wojciech Zielonka Timur Bagautdinov Shunsuke Saito Michael Zollh\u00f6fer Justus Thies and Javier Romero. 2023. Drivable 3D Gaussian Avatars. (2023). arXiv:2311.08581 [cs.CV]"},{"key":"e_1_2_1_113_1","volume-title":"Towards Metrical Reconstruction of Human Faces. In European Conference on Computer Vision (ECCV). https:\/\/api.semanticscholar.org\/CorpusID:248177832","author":"Zielonka Wojciech","year":"2022","unstructured":"Wojciech Zielonka, Timo Bolkart, and Justus Thies. 2022. Towards Metrical Reconstruction of Human Faces. In European Conference on Computer Vision (ECCV). https:\/\/api.semanticscholar.org\/CorpusID:248177832"}],"container-title":["ACM Transactions on Graphics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687771","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3687771","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:45Z","timestamp":1750295865000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687771"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,19]]},"references-count":113,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12,19]]}},"alternative-id":["10.1145\/3687771"],"URL":"https:\/\/doi.org\/10.1145\/3687771","relation":{},"ISSN":["0730-0301","1557-7368"],"issn-type":[{"value":"0730-0301","type":"print"},{"value":"1557-7368","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,19]]},"assertion":[{"value":"2024-11-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}