{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T09:54:16Z","timestamp":1762509256871,"version":"3.44.0"},"reference-count":116,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T00:00:00Z","timestamp":1747353600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T00:00:00Z","timestamp":1747353600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s11263-025-02477-5","type":"journal-article","created":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T10:56:40Z","timestamp":1747393000000},"page":"5938-5957","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["HumanLiff: Layer-wise 3D Human Diffusion Model"],"prefix":"10.1007","volume":"133","author":[{"given":"Shoukang","family":"Hu","sequence":"first","affiliation":[]},{"given":"Fangzhou","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Liang","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Haiyi","family":"Mei","sequence":"additional","affiliation":[]},{"given":"Weiye","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4220-5958","authenticated-orcid":false,"given":"Ziwei","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,16]]},"reference":[{"key":"2477_CR1","doi-asserted-by":"crossref","unstructured":"Aggarwal, A., Wang, J., Hogue, S., Ni, S., Budagavi, M., & Guo, X. (2022). Layered-garment net: Generating multiple implicit garment layers from a single image. In: Proceedings of the Asian Conference on Computer Vision, pp. 3000\u20133017","DOI":"10.1007\/978-3-031-26319-4_23"},{"issue":"6","key":"2477_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480559","volume":"40","author":"B AlBahar","year":"2021","unstructured":"AlBahar, B., Lu, J., Yang, J., Shu, Z., Shechtman, E., & Huang, J.-B. (2021). Pose with style: Detail-preserving pose-guided image synthesis with conditional stylegan. ACM Transactions on Graphics (TOG), 40(6), 1\u201311.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2477_CR3","doi-asserted-by":"crossref","unstructured":"Anciukevi\u010dius, T., Xu, Z., Fisher, M., Henderson, P., Bilen, H., Mitra, N.J., & Guerrero, P. (2022). Renderdiffusion: Image diffusion for 3d reconstruction, inpainting and generation. arXiv preprint arXiv:2211.09869","DOI":"10.1109\/CVPR52729.2023.01213"},{"key":"2477_CR4","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., & Schiele, B. (2014). 2d human pose estimation: New benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3686\u20133693","DOI":"10.1109\/CVPR.2014.471"},{"key":"2477_CR5","first-page":"19900","volume":"35","author":"A Bergman","year":"2022","unstructured":"Bergman, A., Kellnhofer, P., Yifan, W., Chan, E., Lindell, D., & Wetzstein, G. (2022). Generative neural articulated radiance fields. Advances in Neural Information Processing Systems, 35, 19900\u201319916.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2477_CR6","doi-asserted-by":"crossref","unstructured":"Bhatnagar, B.L., Tiwari, G., Theobalt, C., & Pons-Moll, G. (2019). Multi-garment net: Learning to dress 3d people from images. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5420\u20135430","DOI":"10.1109\/ICCV.2019.00552"},{"key":"2477_CR7","doi-asserted-by":"crossref","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., & Black, M.J. (2016). Keep it smpl: Automatic estimation of 3d human pose and shape from a single image. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part V 14, pp. 561\u2013578. Springer","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"2477_CR8","doi-asserted-by":"crossref","unstructured":"Chan, C., Ginosar, S., & Zhou, T., & Efros, A.A. (2019). Everybody dance now. IEEE\/CVF International Conference on Computer Vision (ICCV), 2019, 5932\u20135941.","DOI":"10.1109\/ICCV.2019.00603"},{"key":"2477_CR9","doi-asserted-by":"crossref","unstructured":"Chan, E.R., Lin, C.Z., Chan, M.A., Nagano, K., Pan, B., De\u00a0Mello, S., Gallo, O., Guibas, L.J., Tremblay, J., Khamis, S., et\u00a0al. (2022). Efficient geometry-aware 3d generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"2477_CR10","doi-asserted-by":"crossref","unstructured":"Chen, H., Gu, J., Chen, A., Tian, W., Tu, Z., Liu, L., & Su, H. (2023). Single-stage diffusion nerf: A unified approach to 3d generation and reconstruction. arXiv preprint arXiv:2304.06714","DOI":"10.1109\/ICCV51070.2023.00229"},{"issue":"1","key":"2477_CR11","first-page":"1","volume":"41","author":"X Chen","year":"2021","unstructured":"Chen, X., Pang, A., Yang, W., Wang, P., Xu, L., & Yu, J. (2021). Tightcap: 3d human shape capture with clothing tightness field. ACM Transactions on Graphics (TOG), 41(1), 1\u201317.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2477_CR12","doi-asserted-by":"crossref","unstructured":"Cheng, Y.-C., Lee, H.-Y., Tulyakov, S., Schwing, A., & Gui, L. (2022). Sdfusion: Multimodal 3d shape completion, reconstruction, and generation. arXiv preprint arXiv:2212.04493","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"2477_CR13","doi-asserted-by":"crossref","unstructured":"Deng, B., Lewis, J.P., Jeruzalski, T., Pons-Moll, G., Hinton, G., Norouzi, M., & Tagliasacchi, A. (2020). Nasa neural articulated shape approximation. In: European Conference on Computer Vision, pp. 612\u2013628. Springer","DOI":"10.1007\/978-3-030-58571-6_36"},{"key":"2477_CR14","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., & Nichol, A. (2021). Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems, 34, 8780\u20138794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2477_CR15","doi-asserted-by":"crossref","unstructured":"Dong, Z., Chen, X., Yang, J., Black, M.J., Hilliges, O., & Geiger, A. (2023). Ag3d: Learning to generate 3d avatars from 2d image collections. arXiv preprint arXiv:2305.02312","DOI":"10.1109\/ICCV51070.2023.01370"},{"issue":"11","key":"2477_CR16","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1119\/1.1937609","volume":"29","author":"RM Fano","year":"1961","unstructured":"Fano, R. M., & Hawkins, D. (1961). Transmission of information: A statistical theory of communications. American Journal of Physics, 29(11), 793\u2013794.","journal-title":"American Journal of Physics"},{"key":"2477_CR17","doi-asserted-by":"crossref","unstructured":"Feng, Y., Yang, J., Pollefeys, M., Black, M.J., & Bolkart, T. (2022). Capturing and animation of body and clothing from monocular video. SIGGRAPH Asia 2022 Conference Papers","DOI":"10.1145\/3550469.3555423"},{"key":"2477_CR18","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil, S., Yu, A., Tancik, M., Chen, Q., Recht, B., & Kanazawa, A. (2022). Plenoxels: Radiance fields without neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5501\u20135510","DOI":"10.1109\/CVPR52688.2022.00542"},{"key":"2477_CR19","first-page":"7713","volume":"2022","author":"A Fruhstuck","year":"2022","unstructured":"Fruhstuck, A., Singh, K. K., Shechtman, E., Mitra, N. J., Wonka, P., & Lu, J. (2022). Insetgan for full-body image generation. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022, 7713\u20137722.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR20","doi-asserted-by":"crossref","unstructured":"Fu, J., Li, S., Jiang, Y., Lin, K.-Y., Qian, C., Loy, C.C., Wu, W., & Liu, Z. (2022). Stylegan-human: A data-centric odyssey of human generation. In: European Conference on Computer Vision","DOI":"10.1007\/978-3-031-19787-1_1"},{"key":"2477_CR21","doi-asserted-by":"crossref","unstructured":"Fu, J., Li, S., Jiang, Y., Lin, K.-Y., Qian, C., Loy, C.C., Wu, W., & Liu, Z. (2022). Stylegan-human: A data-centric odyssey of human generation. In: European Conference on Computer Vision, pp. 1\u201319. Springer","DOI":"10.1007\/978-3-031-19787-1_1"},{"key":"2477_CR22","doi-asserted-by":"crossref","unstructured":"Fu, J., Li, S., Jiang, Y., Lin, K.-Y., Wu, W., & Liu, Z. (2023). Unitedhuman: Harnessing multi-source data for high-resolution human generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7301\u20137311","DOI":"10.1109\/ICCV51070.2023.00671"},{"key":"2477_CR23","unstructured":"Gong, J., Ji, S., Foo, L.G., Chen, K., Rahmani, H., & Liu, J. (2024). Laga: Layered 3d avatar generation and customization via gaussian splatting. arXiv preprint arXiv:2405.12663"},{"key":"2477_CR24","first-page":"5147","volume":"2021","author":"A Grigorev","year":"2021","unstructured":"Grigorev, A., Iskakov, K., Ianina, A., Bashirov, R., Zakharkin, I., Vakhitov, A., & Lempitsky, V. S. (2021). Stylepeople: A generative model of fullbody human avatars. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, 5147\u20135156.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR25","unstructured":"Gu, J., Liu, L., Wang, P., & Theobalt, C. (2021). Stylenerf: A style-based 3d-aware generator for high-resolution image synthesis. arXiv:abs\/2110.08985"},{"key":"2477_CR26","first-page":"11026","volume":"2021","author":"T He","year":"2021","unstructured":"He, T., Xu, Y., Saito, S., Soatto, S., & Tung, T. (2021). Arch++: Animation-ready clothed human reconstruction revisited. IEEE\/CVF International Conference on Computer Vision (ICCV), 2021, 11026\u201311036.","journal-title":"IEEE\/CVF International Conference on Computer Vision (ICCV)"},{"key":"2477_CR27","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017). Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30"},{"key":"2477_CR28","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2477_CR29","unstructured":"Hong, F., Chen, Z., Lan, Y., Pan, L., & Liu, Z. (2022). Eva3d: Compositional 3d human generation from 2d image collections. arXiv preprint arXiv:2210.04888"},{"issue":"4","key":"2477_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530094","volume":"41","author":"F Hong","year":"2022","unstructured":"Hong, F., Zhang, M., Pan, L., Cai, Z., Yang, L., & Liu, Z. (2022). Avatarclip: Zero-shot text-driven generation and animation of 3d avatars. ACM Transactions on Graphics (TOG), 41(4), 1\u201319.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2477_CR31","unstructured":"Hong, Y., Peng, B., Xiao, H., Liu, L., & Zhang, J. (2021). Headnerf: A real-time nerf-based parametric head model. arXiv:abs\/2112.05637"},{"key":"2477_CR32","doi-asserted-by":"crossref","unstructured":"Hu, S., Hong, F., Pan, L., Mei, H., Yang, L., & Liu, Z. (2023). Sherf: Generalizable human nerf from a single image. arXiv preprint","DOI":"10.1109\/ICCV51070.2023.00858"},{"key":"2477_CR33","doi-asserted-by":"crossref","unstructured":"Hu, T., Sarkar, K., Liu, L., Zwicker, M., & Theobalt, C. (2021). Egorenderer: Rendering human avatars from egocentric camera images. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14528\u201314538","DOI":"10.1109\/ICCV48922.2021.01426"},{"key":"2477_CR34","doi-asserted-by":"crossref","unstructured":"Hu, T., Yu, T., Zheng, Z., Zhang, H., Liu, Y., & Zwicker, M. (2021). Hvtr: Hybrid volumetric-textural rendering for human avatars. 2022 International Conference on 3D Vision (3DV), 197\u2013208","DOI":"10.1109\/3DV57658.2022.00032"},{"issue":"9","key":"2477_CR35","doi-asserted-by":"publisher","first-page":"1098","DOI":"10.1109\/JRPROC.1952.273898","volume":"40","author":"DA Huffman","year":"1952","unstructured":"Huffman, D. A. (1952). A method for the construction of minimum-redundancy codes. Proceedings of the IRE, 40(9), 1098\u20131101.","journal-title":"Proceedings of the IRE"},{"key":"2477_CR36","doi-asserted-by":"crossref","unstructured":"Hui, K.-H., Li, R., Hu, J., & Fu, C.-W. (2022). Neural wavelet-domain diffusion for 3d shape generation. In: SIGGRAPH Asia 2022 Conference Papers, pp. 1\u20139","DOI":"10.1145\/3550469.3555394"},{"key":"2477_CR37","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Yang, S., Koh, T.L., Wu, W., Loy, C.C., & Liu, Z. (2023). Text2performer: Text-driven human video generation. arXiv preprint arXiv:2304.08483","DOI":"10.1109\/ICCV51070.2023.02079"},{"issue":"4","key":"2477_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530104","volume":"41","author":"Y Jiang","year":"2022","unstructured":"Jiang, Y., Yang, S., Qiu, H., Wu, W., Loy, C. C., & Liu, Z. (2022). Text2human: Text-driven controllable human image generation. ACM Transactions on Graphics (TOG), 41(4), 1\u201311.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"3","key":"2477_CR39","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1145\/964965.808594","volume":"18","author":"JT Kajiya","year":"1984","unstructured":"Kajiya, J. T., & Von Herzen, B. P. (1984). Ray tracing volume densities. ACM SIGGRAPH computer graphics, 18(3), 165\u2013174.","journal-title":"ACM SIGGRAPH computer graphics"},{"key":"2477_CR40","doi-asserted-by":"crossref","unstructured":"Kajiya, J.T., & Herzen, B.V. (1984). Ray tracing volume densities. Proceedings of the 11th annual conference on Computer graphics and interactive techniques","DOI":"10.1145\/800031.808594"},{"key":"2477_CR41","doi-asserted-by":"crossref","unstructured":"Karras, J., Holynski, A., Wang, T.-C., & Kemelmacher-Shlizerman, I. (2023). Dreampose: Fashion image-to-video synthesis via stable diffusion. arXiv preprint arXiv:2304.06025","DOI":"10.1109\/ICCV51070.2023.02073"},{"key":"2477_CR42","first-page":"8107","volume":"2020","author":"T Karras","year":"2019","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., & Aila, T. (2019). Analyzing and improving the image quality of stylegan. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2020, 8107\u20138116.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR43","unstructured":"Kingma, D.P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"2477_CR44","doi-asserted-by":"crossref","unstructured":"Kingma, D.P., Welling, M., et\u00a0al. (2019). An introduction to variational autoencoders. Foundations and Trends\u00ae in Machine Learning 12(4), 307\u2013392","DOI":"10.1561\/2200000056"},{"key":"2477_CR45","unstructured":"Kratzwald, B., Huang, Z., Paudel, D.P., & Van\u00a0Gool, L. (2017). Towards an Understanding of Our World by GANing Videos in the Wild. arXiv:1711.11453. https:\/\/arxiv.org\/abs\/1711.11453"},{"issue":"4","key":"2477_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459884","volume":"40","author":"KM Lewis","year":"2021","unstructured":"Lewis, K. M., Varadharajan, S., & Kemelmacher-Shlizerman, I. (2021). Tryongan: Body-aware try-on via layered interpolation. ACM Transactions on Graphics (TOG), 40(4), 1\u201310.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2477_CR47","doi-asserted-by":"crossref","unstructured":"Li, M., Duan, Y., Zhou, J., & Lu, J. (2022). Diffusion-sdf: Text-to-shape via voxelized diffusion. arXiv preprint arXiv:2212.03293","DOI":"10.1109\/CVPR52729.2023.01216"},{"issue":"6","key":"2477_CR48","first-page":"1","volume":"40","author":"L Liu","year":"2021","unstructured":"Liu, L., Habermann, M., Rudnev, V., Sarkar, K., Gu, J., & Theobalt, C. (2021). Neural actor: Neural free-view synthesis of human actors with pose control. ACM Transactions on Graphics (TOG), 40(6), 1\u201316.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2477_CR49","doi-asserted-by":"crossref","unstructured":"Liu, L., Xu, W., Zollhoefer, M., Kim, H., Bernard, F., Habermann, M., Wang, W., & Theobalt, C. (2019). Neural rendering and reenactment of human actor videos. ACM Transactions on Graphics (TOG)","DOI":"10.1145\/3333002"},{"key":"2477_CR50","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., & Vondrick, C. (2023). Zero-1-to-3: Zero-shot one image to 3d object. arXiv preprint arXiv:2303.11328","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"2477_CR51","unstructured":"Liu, Z., Feng, Y., Black, M.J., Nowrouzezahrai, D., Paull, L., & Liu, W. (2023). Meshdiffusion: Score-based generative 3d mesh modeling. In: International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=0cpM2ApF9p6"},{"key":"2477_CR52","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., & Black, M. J. (2015). SMPL: A skinned multi-person linear model. ACM Trans. Graphics (Proc. SIGGRAPH Asia), 34(6), 248\u2013124816.","DOI":"10.1145\/2816795.2818013"},{"key":"2477_CR53","doi-asserted-by":"crossref","unstructured":"Luo, S., & Hu, W. (2021). Diffusion probabilistic models for 3d point cloud generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2837\u20132845","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"2477_CR54","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N., Siddiqui, Y., Porzi, L., Bul\u00f2, S.R., Kontschieder, P., & Nie\u00dfner, M. (2022). Diffrf: Rendering-guided 3d radiance field diffusion. arXiv preprint arXiv:2212.01206","DOI":"10.1109\/CVPR52729.2023.00421"},{"key":"2477_CR55","first-page":"6468","volume":"2020","author":"Q Ma","year":"2019","unstructured":"Ma, Q., Yang, J., Ranjan, A., Pujades, S., Pons-Moll, G., Tang, S., & Black, M. J. (2019). Learning to dress 3d people in generative clothing. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2020, 6468\u20136477.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"issue":"2","key":"2477_CR56","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1109\/2945.468400","volume":"1","author":"N Max","year":"1995","unstructured":"Max, N. (1995). Optical models for direct volume rendering. IEEE Transactions on Visualization and Computer Graphics, 1(2), 99\u2013108.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2477_CR57","doi-asserted-by":"crossref","unstructured":"Men, Y., Mao, Y., Jiang, Y., Ma, W.-Y., & Lian, Z. (2020). Controllable person image synthesis with attribute-decomposed gan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5084\u20135093","DOI":"10.1109\/CVPR42600.2020.00513"},{"key":"2477_CR58","doi-asserted-by":"crossref","unstructured":"Mihajlovic, M., Zhang, Y., Black, M.J., & Tang, S. (2021). Leap: Learning articulated occupancy of people. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10461\u201310471","DOI":"10.1109\/CVPR46437.2021.01032"},{"key":"2477_CR59","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., & Ng, R. (2020). Nerf: Representing scenes as neural radiance fields for view synthesis. In: European Conference on Computer Vision, pp. 405\u2013421. Springer","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"2477_CR60","unstructured":"Nam, G., Khlifi, M., Rodriguez, A., Tono, A., Zhou, L., & Guerrero, P. (2022). 3d-ldm: Neural implicit 3d shape generation with latent diffusion models. arXiv preprint arXiv:2212.00842"},{"key":"2477_CR61","doi-asserted-by":"crossref","unstructured":"Neverova, N., G\u00fcler, R.A., & Kokkinos, I. (2018). Dense pose transfer. European Conference on Computer Vision (ECCV)","DOI":"10.1007\/978-3-030-01219-9_8"},{"key":"2477_CR62","unstructured":"Nichol, A.Q., & Dhariwal, P. (2021). Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR"},{"key":"2477_CR63","first-page":"11448","volume":"2021","author":"M Niemeyer","year":"2021","unstructured":"Niemeyer, M., & Geiger, A. (2021). Giraffe: Representing scenes as compositional generative neural feature fields. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, 11448\u201311459.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR64","doi-asserted-by":"crossref","unstructured":"Noguchi, A., Sun, X., Lin, S., & Harada, T. (2021). Neural articulated radiance field. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5762\u20135772","DOI":"10.1109\/ICCV48922.2021.00571"},{"key":"2477_CR65","doi-asserted-by":"crossref","unstructured":"Noguchi, A., Sun, X., Lin, S., & Harada, T. (2022). Unsupervised learning of efficient geometry-aware neural articulated representations. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XVII, pp. 597\u2013614. Springer","DOI":"10.1007\/978-3-031-19790-1_36"},{"key":"2477_CR66","unstructured":"Ntavelis, E., Siarohin, A., Olszewski, K., Wang, C., Van\u00a0Gool, L., & Tulyakov, S. (2023). Autodecoding latent 3d diffusion models. arXiv preprint arXiv:2307.05445"},{"key":"2477_CR67","doi-asserted-by":"crossref","unstructured":"Or-El, R., Luo, X., Shan, M., Shechtman, E., Park, J.J., & Kemelmacher-Shlizerman, I. (2021). Stylesdf: High-resolution 3d-consistent image and geometry generation. arXiv:abs\/2112.11427","DOI":"10.1109\/CVPR52688.2022.01314"},{"key":"2477_CR68","doi-asserted-by":"crossref","unstructured":"Pang, A., Chen, X., Luo, H., Wu, M., Yu, J., & Xu, L. (2021). Few-shot neural human performance rendering from sparse rgbd videos. arXiv preprint arXiv:2107.06505","DOI":"10.24963\/ijcai.2021\/130"},{"key":"2477_CR69","doi-asserted-by":"crossref","unstructured":"Patel, C., Liao, Z., & Pons-Moll, G. (2020). Tailornet: Predicting clothing in 3d as a function of human pose, shape and garment style. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","DOI":"10.1109\/CVPR42600.2020.00739"},{"key":"2477_CR70","first-page":"9050","volume":"2021","author":"S Peng","year":"2021","unstructured":"Peng, S., Zhang, Y., Xu, Y., Wang, Q., Shuai, Q., Bao, H., & Zhou, X. (2021). Neural body: Implicit neural representations with structured latent codes for novel view synthesis of dynamic humans. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, 9050\u20139059.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR71","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1145\/3072959.3073711","volume":"36","author":"G Pons-Moll","year":"2017","unstructured":"Pons-Moll, G., Pujades, S., Hu, S., & Black, M. J. (2017). Clothcap: seamless 4d clothing capture and retargeting. ACM Trans. Graph., 36, 73\u201317315.","journal-title":"ACM Trans. Graph."},{"key":"2477_CR72","unstructured":"Poole, B., Jain, A., Barron, J.T., & Mildenhall, B. (2022). Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988"},{"key":"2477_CR73","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Agudo, A., Sanfeliu, A., & Moreno-Noguer, F. (2018). Unsupervised person image synthesis in arbitrary poses. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2018.00899"},{"key":"2477_CR74","first-page":"3721","volume":"2021","author":"A Raj","year":"2021","unstructured":"Raj, A., Tanke, J., Hays, J., Vo, M., Stoll, C., & Lassner, C. (2021). Anr: Articulated neural rendering for virtual avatars. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, 3721\u20133730.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"issue":"6","key":"2477_CR75","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2016). Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE transactions on pattern analysis and machine intelligence, 39(6), 1137\u20131149.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2477_CR76","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18, pp. 234\u2013241. Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2477_CR77","doi-asserted-by":"crossref","unstructured":"Saharia, C., Chan, W., Chang, H., Lee, C., Ho, J., Salimans, T., Fleet, D., & Norouzi, M. (2022). Palette: Image-to-image diffusion models. In: ACM SIGGRAPH 2022 Conference Proceedings, pp. 1\u201310","DOI":"10.1145\/3528233.3530757"},{"key":"2477_CR78","first-page":"2304","volume":"2019","author":"S Saito","year":"2019","unstructured":"Saito, S., Huang, Z., Natsume, R., Morishima, S., Kanazawa, A., & Li, H. (2019). Pifu: Pixel-aligned implicit function for high-resolution clothed human digitization. IEEE\/CVF International Conference on Computer Vision (ICCV), 2019, 2304\u20132314.","journal-title":"IEEE\/CVF International Conference on Computer Vision (ICCV)"},{"key":"2477_CR79","first-page":"81","volume":"2020","author":"S Saito","year":"2020","unstructured":"Saito, S., Simon, T., Saragih, J. M., & Joo, H. (2020). Pifuhd: Multi-level pixel-aligned implicit function for high-resolution 3d human digitization. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2020, 81\u201390.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"2477_CR80","first-page":"2885","volume":"2021","author":"S Saito","year":"2021","unstructured":"Saito, S., Yang, J., Ma, Q., & Black, M. J. (2021). Scanimate: Weakly supervised learning of skinned clothed avatar networks. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021, 2885\u20132896.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"issue":"3","key":"2477_CR81","doi-asserted-by":"publisher","first-page":"8","DOI":"10.4236\/jcc.2019.73002","volume":"7","author":"U Sara","year":"2019","unstructured":"Sara, U., Akter, M., & Uddin, M. S. (2019). Image quality assessment through fsim, ssim, mse and psnr-a comparative study. Journal of Computer and Communications, 7(3), 8\u201318.","journal-title":"Journal of Computer and Communications"},{"key":"2477_CR82","doi-asserted-by":"crossref","unstructured":"Sarkar, K., Liu, L., Golyanik, V., & Theobalt, C. (2021). Humangan: A generative model of human images. In: 2021 International Conference on 3D Vision (3DV), pp. 258\u2013267. IEEE","DOI":"10.1109\/3DV53792.2021.00036"},{"key":"2477_CR83","doi-asserted-by":"crossref","unstructured":"Sarkar, K., Mehta, D., Xu, W., Golyanik, V., & Theobalt, C. (2020). Neural re-rendering of humans from a single image. In: European Conference on Computer Vision (ECCV)","DOI":"10.1007\/978-3-030-58621-8_35"},{"issue":"3","key":"2477_CR84","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1002\/j.1538-7305.1948.tb01338.x","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon, C. E. (1948). A mathematical theory of communication. The Bell system technical journal, 27(3), 379\u2013423.","journal-title":"The Bell system technical journal"},{"issue":"1","key":"2477_CR85","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/JRPROC.1949.232969","volume":"37","author":"CE Shannon","year":"1949","unstructured":"Shannon, C. E. (1949). Communication in the presence of noise. Proceedings of the IRE, 37(1), 10\u201321.","journal-title":"Proceedings of the IRE"},{"key":"2477_CR86","doi-asserted-by":"crossref","unstructured":"Shue, J.R., Chan, E.R., Po, R., Ankner, Z., Wu, J., & Wetzstein, G. (2022). 3d neural field generation using triplane diffusion. arXiv preprint arXiv:2211.16677","DOI":"10.1109\/CVPR52729.2023.02000"},{"key":"2477_CR87","volume-title":"2018","author":"A Siarohin","year":"2018","unstructured":"Siarohin, A., Sangineto, E., Lathuiliere, S., & Sebe, N. (2018). 2018. Deformable GANs for pose-based human image generation. In: CVPR."},{"key":"2477_CR88","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., & Ganguli, S. (2015). Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR"},{"key":"2477_CR89","unstructured":"Song, J., Meng, C., & Ermon, S. (2020). Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502"},{"key":"2477_CR90","first-page":"2","volume":"3","author":"Y Song","year":"2019","unstructured":"Song, Y., & Ermon, S. (2019). Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems, 3, 2.","journal-title":"Advances in neural information processing systems"},{"key":"2477_CR91","first-page":"12438","volume":"33","author":"Y Song","year":"2020","unstructured":"Song, Y., & Ermon, S. (2020). Improved techniques for training score-based generative models. Advances in neural information processing systems, 33, 12438\u201312448.","journal-title":"Advances in neural information processing systems"},{"key":"2477_CR92","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., & Poole, B. (2020). Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456"},{"key":"2477_CR93","unstructured":"Su, S.-Y., Yu, F., Zollhoefer, M., & Rhodin, H. (2021). A-nerf: Articulated neural radiance fields for learning human shape, appearance, and pose."},{"key":"2477_CR94","doi-asserted-by":"crossref","unstructured":"Sun, C., Sun, M., & Chen, H.-T. (2022). Direct voxel grid optimization: Super-fast convergence for radiance fields reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5459\u20135469","DOI":"10.1109\/CVPR52688.2022.00538"},{"key":"2477_CR95","doi-asserted-by":"crossref","unstructured":"Sun, G., Chen, X., Chen, Y., Pang, A., Lin, P., Jiang, Y., Xu, L., Yu, J., & Wang, J. (2021). Neural free-viewpoint performance rendering under complex human-object interactions. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 4651\u20134660","DOI":"10.1145\/3474085.3475442"},{"key":"2477_CR96","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., & Wang, J. (2019). Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703","DOI":"10.1109\/CVPR.2019.00584"},{"key":"2477_CR97","unstructured":"Thomas, M., & Joy, A. T. (2006). Elements of Information Theory. ??? Wiley-Interscience."},{"key":"2477_CR98","doi-asserted-by":"crossref","unstructured":"Tiwari, G., Sarafianos, N., Tung, T., & Pons-Moll, G. (2021). Neural-gif: Neural generalized implicit functions for animating people in clothing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11708\u201311718","DOI":"10.1109\/ICCV48922.2021.01150"},{"key":"2477_CR99","first-page":"4","volume":"3","author":"S Wang","year":"2021","unstructured":"Wang, S., Mihajlovic, M., Ma, Q., Geiger, A., & Tang, S. (2021). Metaavatar: Learning animatable clothed human models from few depth images. Advances in Neural Information Processing Systems, 3, 4.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2477_CR100","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, B., Zhang, T., Gu, S., Bao, J., Baltrusaitis, T., Shen, J., Chen, D., Wen, F., Chen, Q., et\u00a0al. (2022). Rodin: A generative model for sculpting 3d digital avatars using diffusion. arXiv preprint arXiv:2212.06135","DOI":"10.1109\/CVPR52729.2023.00443"},{"key":"2477_CR101","unstructured":"Watson, D., Chan, W., Martin-Brualla, R., Ho, J., Tagliasacchi, A., & Norouzi, M. (2022). Novel view synthesis with diffusion models. arXiv preprint arXiv:2210.04628"},{"key":"2477_CR102","doi-asserted-by":"crossref","unstructured":"Weng, C.-Y., Curless, B., Srinivasan, P.P., Barron, J.T., & Kemelmacher-Shlizerman, I. (2022). Humannerf: Free-viewpoint rendering of moving people from monocular video. arXiv:abs\/2201.04127","DOI":"10.1109\/CVPR52688.2022.01573"},{"issue":"1","key":"2477_CR103","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIT.1976.1055508","volume":"22","author":"A Wyner","year":"1976","unstructured":"Wyner, A., & Ziv, J. (1976). The rate-distortion function for source coding with side information at the decoder. IEEE Transactions on information Theory, 22(1), 1\u201310.","journal-title":"IEEE Transactions on information Theory"},{"key":"2477_CR104","doi-asserted-by":"crossref","unstructured":"Xiu, Y., Yang, J., Tzionas, D., & Black, M.J. (2022). Icon: Implicit clothed humans obtained from normals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13296\u201313306","DOI":"10.1109\/CVPR52688.2022.01294"},{"key":"2477_CR105","first-page":"2","volume":"3","author":"J Xu","year":"2019","unstructured":"Xu, J., Sun, X., Zhang, Z., Zhao, G., & Lin, J. (2019). Understanding and improving layer normalization. Advances in Neural Information Processing Systems, 3, 2.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2477_CR106","unstructured":"Yang, G., Kundu, A., Guibas, L.J., Barron, J.T., & Poole, B. (2023). Learning a diffusion prior for nerfs. arXiv preprint arXiv:2304.14473"},{"key":"2477_CR107","first-page":"7287","volume":"2018","author":"T Yu","year":"2018","unstructured":"Yu, T., Zheng, Z., Guo, K., Zhao, J., Dai, Q., Li, H., Pons-Moll, G., & Liu, Y. (2018). Doublefusion: Real-time capture of human performances with inner body shapes from a single depth sensor. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2018, 7287\u20137296.","journal-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"2477_CR108","doi-asserted-by":"crossref","unstructured":"Yu, T., Zheng, Z., Zhong, Y., Zhao, J., Dai, Q., Pons-Moll, G., & Liu, Y. (2019). Simulcap\u202f: Single-view human performance capture with cloth simulation. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2019, 5499\u20135509.","DOI":"10.1109\/CVPR.2019.00565"},{"key":"2477_CR109","doi-asserted-by":"crossref","unstructured":"Zhang, J., Jiang, Z., Yang, D., Xu, H., Shi, Y., Song, G., Xu, Z., Wang, X., & Feng, J. (2023). Avatargen: a 3d generative model for animatable human avatars. In: Computer Vision\u2013ECCV 2022 Workshops: Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part III, pp. 668\u2013685. Springer","DOI":"10.1007\/978-3-031-25066-8_39"},{"key":"2477_CR110","doi-asserted-by":"crossref","unstructured":"Zhang, L., & Agrawala, M. (2023). Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"2477_CR111","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., & Wang, O. (2018). The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2477_CR112","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Yu, T., Liu, Y., & Dai, Q. (2021). Pamir: Parametric model-conditioned implicit representation for image-based human reconstruction. IEEE transactions on pattern analysis and machine intelligence PP","DOI":"10.1109\/TPAMI.2021.3050505"},{"key":"2477_CR113","unstructured":"Zhitao, Y., Zhongang, C., Haiyi, M., Shuai, L., Zhaoxi, C., Weiye, X., Yukun, W., Zhongfei, Q., Chen, W., Bo, D., Wayne, W., Chen, Q., Dahua, L., Ziwei, L., & Lei, Y. (2023). SynBody: Synthetic Dataset with Layered Human Models for 3D Human Perception and Modeling"},{"key":"2477_CR114","doi-asserted-by":"crossref","unstructured":"Zhou, L., Du, Y., & Wu, J. (2021). 3d shape generation and completion through point-voxel diffusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5826\u20135835","DOI":"10.1109\/ICCV48922.2021.00577"},{"key":"2477_CR115","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yang, D., Zhu, T., Reda, F., Chan, W., Saharia, C., Norouzi, M., & Kemelmacher-Shlizerman, I. (2023). Tryondiffusion: A tale of two unets. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4606\u20134615","DOI":"10.1109\/CVPR52729.2023.00447"},{"key":"2477_CR116","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Huang, T., Shi, B., Yu, M., Wang, B., & Bai, X. (2019). Progressive pose attention transfer for person image generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2347\u20132356","DOI":"10.1109\/CVPR.2019.00245"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02477-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02477-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02477-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T08:00:59Z","timestamp":1757404859000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02477-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,16]]},"references-count":116,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["2477"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02477-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2025,5,16]]},"assertion":[{"value":"1 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}