{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T07:48:49Z","timestamp":1778399329201,"version":"3.51.4"},"publisher-location":"Cham","reference-count":91,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729423","type":"print"},{"value":"9783031729430","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72943-0_27","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:41:39Z","timestamp":1732801299000},"page":"471-490","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["VQ-HPS: Human Pose and\u00a0Shape Estimation in\u00a0a\u00a0Vector-Quantized Latent Space"],"prefix":"10.1007","author":[{"given":"Gu\u00e9nol\u00e9","family":"Fiche","sequence":"first","affiliation":[]},{"given":"Simon","family":"Leglaive","sequence":"additional","affiliation":[]},{"given":"Xavier","family":"Alameda-Pineda","sequence":"additional","affiliation":[]},{"given":"Antonio","family":"Agudo","sequence":"additional","affiliation":[]},{"given":"Francesc","family":"Moreno-Noguer","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., Schiele, B.: 2D human pose estimation: new benchmark and state of the art analysis. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3686\u20133693 (2014)","DOI":"10.1109\/CVPR.2014.471"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Anguelov, D., Srinivasan, P., Koller, D., Thrun, S., Rodgers, J., Davis, J.: Scape: shape completion and animation of people. ACM Trans. Graph. (TOG) 24(3) (2005)","DOI":"10.1145\/1073204.1073207"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Armando, M., et al.: Cross-view and cross-pose completion for 3D human understanding. arXiv preprint arXiv:2311.09104 (2023)","DOI":"10.1109\/CVPR52733.2024.00150"},{"key":"27_CR4","first-page":"20496","volume":"33","author":"B Biggs","year":"2020","unstructured":"Biggs, B., Novotny, D., Ehrhardt, S., Joo, H., Graham, B., Vedaldi, A.: 3D multi-bodies: fitting sets of plausible 3D human models to ambiguous image data. Adv. Neural Inf. Process. Syst. (NIPS) 33, 20496\u201320507 (2020)","journal-title":"Adv. Neural Inf. Process. Syst. (NIPS)"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Black, M.J., Patel, P., Tesch, J., Yang, J.: BEDLAM: a synthetic dataset of bodies exhibiting detailed lifelike animated motion. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8726\u20138737 (2023)","DOI":"10.1109\/CVPR52729.2023.00843"},{"key":"27_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/978-3-319-46454-1_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"F Bogo","year":"2016","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., Black, M.J.: Keep it SMPL: automatic estimation of 3D human pose and shape from a single image. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 561\u2013578. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_34"},{"key":"27_CR7","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1007\/978-3-031-20071-7_33","volume-title":"European Conference on Computer Vision (ECCV)","author":"Z Cai","year":"2022","unstructured":"Cai, Z., et al.: Humman: multi-modal 4D human dataset for versatile sensing and modeling. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13667, pp. 557\u2013577. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20071-7_33"},{"key":"27_CR8","unstructured":"Cai, Z., et\u00a0al.: Smpler-x: scaling up expressive human pose and shape estimation. In: Advances in Neural Information Processing Systems (NIPS), vol. 36 (2024)"},{"key":"27_CR9","unstructured":"Cai, Z., et al.: Playing for 3D human recovery. arXiv preprint arXiv:2110.07588 (2021)"},{"key":"27_CR10","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1007\/978-3-031-19769-7_20","volume-title":"European Conference on Computer Vision (ECCV)","author":"J Cho","year":"2022","unstructured":"Cho, J., Youwang, K., Oh, T.H.: Cross-attention of disentangled modalities for 3D human mesh recovery with transformers. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13661, pp. 342\u2013359. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19769-7_20"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Choi, H., Moon, G., Chang, J.Y., Lee, K.M.: Beyond static features for temporally consistent 3D human pose and shape from a video. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1964\u20131973 (2021)","DOI":"10.1109\/CVPR46437.2021.00200"},{"key":"27_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1007\/978-3-030-58571-6_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Choi","year":"2020","unstructured":"Choi, H., Moon, G., Lee, K.M.: Pose2Mesh: graph convolutional network for 3D human pose and mesh recovery from a 2D human pose. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 769\u2013787. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_45"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Cihan\u00a0Camgoz, N., Hadfield, S., Koller, O., Bowden, R.: Subunets: end-to-end hand shape and continuous sign language recognition. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3056\u20133065 (2017)","DOI":"10.1109\/ICCV.2017.332"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Cohen, I., Li, H.: Inference of human postures by classification of 3D human body shape. In: IEEE International Workshop on Analysis and Modeling of Faces and Gestures, pp. 74\u201381 (2003)","DOI":"10.1109\/AMFG.2003.1240827"},{"key":"27_CR15","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1007\/978-3-031-20086-1_9","volume-title":"European Conference on Computer Vision (ECCV)","author":"E Corona","year":"2022","unstructured":"Corona, E., Pons-Moll, G., Aleny\u00e0, G., Moreno-Noguer, F.: Learned vertex descent: a new direction for 3D human model fitting. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13662, pp. 146\u2013165. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20086-1_9"},{"key":"27_CR16","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-031-20068-7_20","volume-title":"European Conference on Computer Vision (ECCV)","author":"G Delmas","year":"2022","unstructured":"Delmas, G., Weinzaepfel, P., Lucas, T., Moreno-Noguer, F., Rogez, G.: Posescript: 3D human poses from natural language. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13666, pp. 346\u2013362. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_20"},{"key":"27_CR17","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2021)"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Dou, Z., et al.: Tore: token reduction for efficient human mesh recovery with transformer. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 15143\u201315155 (2023)","DOI":"10.1109\/ICCV51070.2023.01390"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Dwivedi, S.K., Athanasiou, N., Kocabas, M., Black, M.J.: Learning to regress bodies from images using differentiable semantic rendering. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11250\u201311259 (2021)","DOI":"10.1109\/ICCV48922.2021.01106"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Dwivedi, S.K., Sun, Y., Patel, P., Feng, Y., Black, M.J.: TokenHMR: advancing human mesh recovery with a tokenized pose representation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.00132"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Einfalt, M., Zecha, D., Lienhart, R.: Activity-conditioned continuous human pose estimation for performance analysis of athletes using the example of swimming. In: IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 446\u2013455. IEEE (2018)","DOI":"10.1109\/WACV.2018.00055"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Fan, T., Alwala, K.V., Xiang, D., Xu, W., Murphey, T., Mukadam, M.: Revitalizing optimization for 3D human pose and shape estimation: a sparse constrained formulation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11457\u201311466 (2021)","DOI":"10.1109\/ICCV48922.2021.01126"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Fang, Q., Chen, K., Fan, Y., Shuai, Q., Li, J., Zhang, W.: Learning analytical posterior probability for human mesh recovery. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8781\u20138791 (2023)","DOI":"10.1109\/CVPR52729.2023.00848"},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"Feng, A., Shin, S., Yoon, Y.: A tool for extracting 3D avatar-ready gesture animations from monocular videos. In: ACM SIGGRAPH Conference on Motion, Interaction and Games (ACM MIG), pp.\u00a01\u20137 (2022)","DOI":"10.1145\/3561975.3562953"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Feng, Y., Lin, J., Dwivedi, S.K., Sun, Y., Patel, P., Black, M.J.: Posegpt: chatting about 3D human pose. arXiv preprint arXiv:2311.18836 (2023)","DOI":"10.1109\/CVPR52733.2024.00204"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Fiche, G., Sevestre, V., Gonzalez-Barral, C., Leglaive, S.: Swimxyz: a large-scale dataset of synthetic swimming motions and videos. In: ACM SIGGRAPH Conference on Motion, Interaction and Games (MIG) (2023)","DOI":"10.1145\/3623264.3624440"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Goel, S., Pavlakos, G., Rajasegaran, J., Kanazawa, A., Malik, J.: Humans in 4D: reconstructing and tracking humans with transformers. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14783\u201314794 (2023)","DOI":"10.1109\/ICCV51070.2023.01358"},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Guler, R.A., Kokkinos, I.: Holopose: holistic 3D human reconstruction in-the-wild. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10884\u201310894 (2019)","DOI":"10.1109\/CVPR.2019.01114"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Guo, G., Fu, Y., Dyer, C.R., Huang, T.S.: Head pose estimation: classification or regression? In: International Conference on Pattern Recognition (ICPR), pp.\u00a01\u20134. IEEE (2008)","DOI":"10.1109\/ICPR.2008.4761081"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Guo, W., Bie, X., Alameda-Pineda, X., Moreno-Noguer, F.: Multi-person extreme motion prediction. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13053\u201313064 (2022)","DOI":"10.1109\/CVPR52688.2022.01271"},{"key":"27_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6m: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. (PAMI) 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Joo, H., Neverova, N., Vedaldi, A.: Exemplar fine-tuning for 3D human model fitting towards in-the-wild 3D human pose estimation. In: International Conference on 3D Vision (3DV), pp. 42\u201352. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"27_CR34","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"27_CR35","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Zhang, J.Y., Felsen, P., Malik, J.: Learning 3D human dynamics from video. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5614\u20135623 (2019)","DOI":"10.1109\/CVPR.2019.00576"},{"key":"27_CR36","doi-asserted-by":"crossref","unstructured":"Kaufmann, M., et al.: EMDB: the electromagnetic database of global 3D human pose and shape in the wild. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14632\u201314643 (2023)","DOI":"10.1109\/ICCV51070.2023.01345"},{"key":"27_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1007\/978-3-642-33783-3_61","volume-title":"Computer Vision \u2013 ECCV 2012","author":"C Keskin","year":"2012","unstructured":"Keskin, C., K\u0131ra\u00e7, F., Kara, Y.E., Akarun, L.: Hand pose estimation and hand shape classification using multi-layered randomized decision forests. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7577, pp. 852\u2013863. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33783-3_61"},{"key":"27_CR38","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: Vibe: video inference for human body pose and shape estimation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5253\u20135263 (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"27_CR39","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Huang, C.H.P., Hilliges, O., Black, M.J.: PARE: part attention regressor for 3D human body estimation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11127\u201311137 (2021)","DOI":"10.1109\/ICCV48922.2021.01094"},{"key":"27_CR40","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Black, M.J., Daniilidis, K.: Learning to reconstruct 3D human pose and shape via model-fitting in the loop. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 2252\u20132261 (2019)","DOI":"10.1109\/ICCV.2019.00234"},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Daniilidis, K.: Convolutional mesh regression for single-image human shape reconstruction. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4501\u20134510 (2019)","DOI":"10.1109\/CVPR.2019.00463"},{"key":"27_CR42","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Jayaraman, D., Daniilidis, K.: Probabilistic modeling for human mesh recovery. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11605\u201311614 (2021)","DOI":"10.1109\/ICCV48922.2021.01140"},{"key":"27_CR43","doi-asserted-by":"crossref","unstructured":"Lassner, C., Romero, J., Kiefel, M., Bogo, F., Black, M.J., Gehler, P.V.: Unite the people: Closing the loop between 3D and 2D human representations. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6050\u20136059 (2017)","DOI":"10.1109\/CVPR.2017.500"},{"key":"27_CR44","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-3-031-20068-7_6","volume-title":"European Conference on Computer Vision (ECCV)","author":"Y Li","year":"2022","unstructured":"Li, Y., et al.: SimCC: a simple coordinate classification perspective for human pose estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13666, pp. 89\u2013106. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_6"},{"key":"27_CR45","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1007\/978-3-031-20065-6_34","volume-title":"European Conference on Computer Vision (ECCV)","author":"Z Li","year":"2022","unstructured":"Li, Z., Liu, J., Zhang, Z., Xu, S., Yan, Y.: CLIFF: carrying location information in full frames into human pose and shape estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13665, pp. 590\u2013606. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_34"},{"key":"27_CR46","doi-asserted-by":"crossref","unstructured":"Lin, J., Zeng, A., Wang, H., Zhang, L., Li, Y.: One-stage 3D whole-body mesh recovery with component aware transformer. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 21159\u201321168 (2023)","DOI":"10.1109\/CVPR52729.2023.02027"},{"key":"27_CR47","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: End-to-end human pose and mesh reconstruction with transformers. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1954\u20131963 (2021)","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"27_CR48","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: Mesh graphormer. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 12939\u201312948 (2021)","DOI":"10.1109\/ICCV48922.2021.01270"},{"key":"27_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"27_CR50","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. (TOG) 34(6) (2015)","DOI":"10.1145\/2816795.2818013"},{"key":"27_CR51","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1007\/978-3-031-20068-7_24","volume-title":"European Conference on Computer Vision (ECCV)","author":"T Lucas","year":"2022","unstructured":"Lucas, T., Baradel, F., Weinzaepfel, P., Rogez, G.: PoseGPT: quantization-based 3D human motion generation and forecasting. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13666, pp. 417\u2013435. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_24"},{"key":"27_CR52","doi-asserted-by":"crossref","unstructured":"Luo, Z., Golestaneh, S.A., Kitani, K.M.: 3D human motion estimation via motion compression and refinement. In: Asian Conference on Computer Vision (ACCV) (2020)","DOI":"10.1007\/978-3-030-69541-5_20"},{"key":"27_CR53","doi-asserted-by":"crossref","unstructured":"Ma, X., Su, J., Wang, C., Zhu, W., Wang, Y.: 3D human mesh estimation from virtual markers. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 534\u2013543 (2023)","DOI":"10.1109\/CVPR52729.2023.00059"},{"key":"27_CR54","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: Amass: archive of motion capture as surface shapes. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"27_CR55","doi-asserted-by":"crossref","unstructured":"von Marcard, T., Henschel, R., Black, M., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3D human pose in the wild using IMUs and a moving camera. In: European Conference on Computer Vision (ECCV), pp. 601\u2013617. Springer, Cham (2018)","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"27_CR56","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: International Conference on 3D Vision (3DV), pp. 506\u2013516. IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"27_CR57","doi-asserted-by":"crossref","unstructured":"Moon, G., Choi, H., Lee, K.M.: Neuralannot: neural annotator for 3D human mesh training sets. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2299\u20132307 (2022)","DOI":"10.1109\/CVPRW56347.2022.00256"},{"key":"27_CR58","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/978-3-030-58571-6_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"G Moon","year":"2020","unstructured":"Moon, G., Lee, K.M.: I2L-MeshNet: image-to-lixel prediction network for accurate 3D human pose and mesh estimation from a single RGB image. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 752\u2013768. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_44"},{"key":"27_CR59","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1007\/978-3-031-20086-1_33","volume-title":"European Conference on Computer Vision (ECCV)","author":"AAA Osman","year":"2022","unstructured":"Osman, A.A.A., Bolkart, T., Tzionas, D., Black, M.J.: SUPR: a sparse unified part-based human representation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13662, pp. 568\u2013585. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20086-1_33"},{"key":"27_CR60","doi-asserted-by":"crossref","unstructured":"Patel, P., Huang, C.H.P., Tesch, J., Hoffmann, D.T., Tripathi, S., Black, M.J.: AGORA: avatars in geography optimized for regression analysis. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13468\u201313478 (2021)","DOI":"10.1109\/CVPR46437.2021.01326"},{"key":"27_CR61","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3D hands, face, and body from a single image. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"27_CR62","doi-asserted-by":"crossref","unstructured":"Rempe, D., Birdal, T., Hertzmann, A., Yang, J., Sridhar, S., Guibas, L.J.: Humor: 3D human motion model for robust pose estimation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11488\u201311499 (2021)","DOI":"10.1109\/ICCV48922.2021.01129"},{"key":"27_CR63","unstructured":"Rogez, G., Schmid, C.: Mocap-guided data augmentation for 3D pose estimation in the wild. In: Advances in Neural Information Processing Systems (NIPS), vol. 29, pp. 3108\u20133116 (2016)"},{"issue":"5","key":"27_CR64","first-page":"1146","volume":"42","author":"G Rogez","year":"2019","unstructured":"Rogez, G., Weinzaepfel, P., Schmid, C.: LCR-Net++: multi-person 2D and 3D pose detection in natural images. IEEE Trans. Pattern Anal. Mach. Intell. (PAMI) 42(5), 1146\u20131161 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (PAMI)"},{"key":"27_CR65","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision (IJCV) 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"27_CR66","doi-asserted-by":"crossref","unstructured":"Sarakon, P., Charoenpong, T., Charoensiriwath, S.: Face shape classification from 3D human data by using SVM. In: Biomedical Engineering International Conference, pp.\u00a01\u20135. IEEE (2014)","DOI":"10.1109\/BMEiCON.2014.7017382"},{"key":"27_CR67","unstructured":"Sengupta, A., Budvytis, I., Cipolla, R.: Synthetic training for accurate 3D human pose and shape estimation in the wild. In: British Machine Vision Conference (BMVC) (2020)"},{"key":"27_CR68","doi-asserted-by":"crossref","unstructured":"Sengupta, A., Budvytis, I., Cipolla, R.: Probabilistic 3D human shape and pose estimation from multiple unconstrained images in the wild. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16094\u201316104 (2021)","DOI":"10.1109\/CVPR46437.2021.01583"},{"key":"27_CR69","doi-asserted-by":"crossref","unstructured":"Sengupta, A., Budvytis, I., Cipolla, R.: Humaniflow: ancestor-conditioned normalising flows on so (3) manifolds for human pose and shape distribution estimation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4779\u20134789 (2023)","DOI":"10.1109\/CVPR52729.2023.00463"},{"key":"27_CR70","doi-asserted-by":"crossref","unstructured":"Shi, M., Starke, S., Ye, Y., Komura, T., Won, J.: Phasemp: robust 3D pose estimation via phase-conditioned human motion prior. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14725\u201314737 (2023)","DOI":"10.1109\/ICCV51070.2023.01353"},{"key":"27_CR71","doi-asserted-by":"crossref","unstructured":"Siyao, L., et al.: Bailando: 3D dance generation by actor-critic GPT with choreographic memory. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11050\u201311059 (2022)","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"27_CR72","doi-asserted-by":"crossref","unstructured":"Starke, S., Mason, I., Komura, T.: Deepphase: periodic autoencoders for learning motion phase manifolds. ACM Trans. Graph. (TOG) 41(4) (2022)","DOI":"10.1145\/3528223.3530178"},{"key":"27_CR73","doi-asserted-by":"crossref","unstructured":"Sun, Y., Bao, Q., Liu, W., Fu, Y., Black, M.J., Mei, T.: Monocular, one-stage, regression of multiple 3D people. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11179\u201311188 (2021)","DOI":"10.1109\/ICCV48922.2021.01099"},{"key":"27_CR74","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1007\/978-3-031-20065-6_33","volume-title":"European Conference on Computer Vision (ECCV)","author":"G Tiwari","year":"2022","unstructured":"Tiwari, G., Antic, D., Lenssen, J.E., Sarafianos, N., Tung, T., Pons-Moll, G.: Pose-NDF: modeling human pose manifolds with neural distance fields. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13665, pp. 572\u2013589. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_33"},{"key":"27_CR75","unstructured":"Van Den Oord, A., Vinyals, O., et al.: Neural discrete representation learning. In: Advances in Neural Information Processing Systems (NIPS), vol. 30, pp. 6306\u20136315 (2017)"},{"key":"27_CR76","doi-asserted-by":"crossref","unstructured":"Varol, G., et al.: Learning from synthetic humans. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 109\u2013117 (2017)","DOI":"10.1109\/CVPR.2017.492"},{"key":"27_CR77","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (NIPS), vol. 30, pp. 5998\u20136008 (2017)"},{"key":"27_CR78","doi-asserted-by":"crossref","unstructured":"Wang, D., Zhang, S.: 3D human mesh recovery with sequentially global rotation estimation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14953\u201314962 (2023)","DOI":"10.1109\/ICCV51070.2023.01373"},{"key":"27_CR79","doi-asserted-by":"crossref","unstructured":"Wang, J., Qiu, K., Peng, H., Fu, J., Zhu, J.: AI coach: deep human pose estimation and analysis for personalized athletic training assistance. In: ACM International Conference on Multimedia (ACM MM), pp. 374\u2013382 (2019)","DOI":"10.1145\/3343031.3350910"},{"key":"27_CR80","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Zolly: zoom focal length correctly for perspective-distorted human mesh reconstruction. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3925\u20133935 (2023)","DOI":"10.1109\/ICCV51070.2023.00363"},{"key":"27_CR81","doi-asserted-by":"crossref","unstructured":"Xu, H., Bazavan, E.G., Zanfir, A., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: GHUM & GHUML: generative 3D human shape and articulated pose models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6184\u20136193 (2020)","DOI":"10.1109\/CVPR42600.2020.00622"},{"key":"27_CR82","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"284","DOI":"10.1007\/978-3-030-58545-7_17","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Xu","year":"2020","unstructured":"Xu, X., Chen, H., Moreno-Noguer, F., Jeni, L.A., De la Torre, F.: 3D human shape and pose from a single low-resolution image with self-supervised learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 284\u2013300. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_17"},{"issue":"9","key":"27_CR83","first-page":"4490","volume":"44","author":"X Xu","year":"2021","unstructured":"Xu, X., Chen, H., Moreno-Noguer, F., Jeni, L.A., De la Torre, F.: 3D human pose, shape and texture from low-resolution images and videos. IEEE Trans. Pattern Anal. Mach. Intell. (PAMI) 44(9), 4490\u20134504 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (PAMI)"},{"key":"27_CR84","doi-asserted-by":"crossref","unstructured":"Yang, S., et al.: Qpgesture: quantization-based and phase-guided motion matching for natural speech-driven gesture generation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2321\u20132330 (2023)","DOI":"10.1109\/CVPR52729.2023.00230"},{"key":"27_CR85","doi-asserted-by":"crossref","unstructured":"Zanfir, A., Marinoiu, E., Sminchisescu, C.: Monocular 3D pose and shape estimation of multiple people in natural scenes-the importance of multiple scene constraints. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2148\u20132157 (2018)","DOI":"10.1109\/CVPR.2018.00229"},{"key":"27_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: Learning physically simulated tennis skills from broadcast videos. ACM Trans. Graph. (TOG) 42(4) (2023)","DOI":"10.1145\/3592408"},{"issue":"5","key":"27_CR87","first-page":"2610","volume":"44","author":"H Zhang","year":"2020","unstructured":"Zhang, H., Cao, J., Lu, G., Ouyang, W., Sun, Z.: Learning 3D human shape and pose from dense body parts. IEEE Trans. Pattern Anal. Mach. Intell. (PAMI) 44(5), 2610\u20132627 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (PAMI)"},{"key":"27_CR88","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: Pymaf: 3D human pose and shape regression with pyramidal mesh alignment feedback loop. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11446\u201311456 (2021)","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"27_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Generating human motion from textual descriptions with discrete representations. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14730\u201314740 (2023)","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"27_CR90","doi-asserted-by":"crossref","unstructured":"Zheng, C., Liu, X., Qi, G.J., Chen, C.: Potter: pooling attention transformer for efficient human mesh recovery. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1611\u20131620 (2023)","DOI":"10.1109\/CVPR52729.2023.00161"},{"key":"27_CR91","unstructured":"Zhou, Y., et al.: Fully convolutional mesh autoencoder using efficient spatially varying kernels. In: Advances in Neural Information Processing Systems (NIPS), vol. 33, pp. 9251\u20139262 (2020)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72943-0_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:23:32Z","timestamp":1732803812000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72943-0_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031729423","9783031729430"],"references-count":91,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72943-0_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}