{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:25:27Z","timestamp":1780392327724,"version":"3.54.1"},"publisher-location":"Cham","reference-count":118,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726392","type":"print"},{"value":"9783031726408","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72640-8_24","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:34:20Z","timestamp":1730108060000},"page":"426-446","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["COIN: Control-Inpainting Diffusion Prior for\u00a0Human and\u00a0Camera Motion Estimation"],"prefix":"10.1007","author":[{"given":"Jiefeng","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ye","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Davis","family":"Rempe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Haotian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pavlo","family":"Molchanov","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cewu","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jan","family":"Kautz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Umar","family":"Iqbal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Akhter, I., Black, M.J.: Pose-conditioned joint angle limits for 3D human pose reconstruction. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298751"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Aksan, E., Kaufmann, M., Hilliges, O.: Structured prediction helps 3D human motion modelling. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00724"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Barsoum, E., Kender, J., Liu, Z.: HP-GAN: probabilistic 3D human motion prediction via GAN. In: CVPR Workshops (2018)","DOI":"10.1109\/CVPRW.2018.00191"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., Black, M.J.: Keep it SMPL: automatic estimation of 3D human pose and shape from a single image. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"24_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/978-3-030-58452-8_23","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Cao","year":"2020","unstructured":"Cao, Z., Gao, H., Mangalam, K., Cai, Q.-Z., Vo, M., Malik, J.: Long-term human motion prediction with scene context. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 387\u2013404. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_23"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Choi, H., Moon, G., Lee, K.M.: Pose2Mesh: graph convolutional network for 3D human pose and mesh recovery from a 2D human pose. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58571-6_45"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Choi, H., Moon, G., Lee, K.M.: Beyond static features for temporally consistent 3D human pose and shape from a video. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00200"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Choutas, V., Pavlakos, G., Bolkart, T., Tzionas, D., Black, M.J.: Monocular expressive body regression through body-driven attention. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58607-2_2"},{"key":"24_CR9","unstructured":"Contributors, M.: OpenMMLab pose estimation toolbox and benchmark. https:\/\/github.com\/open-mmlab\/mmpose (2020)"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Dabral, R., Shimada, S., Jain, A., Theobalt, C., Golyanik, V.: Gravity-aware monocular 3D human-object reconstruction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01214"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Fabbri, M., Lanzi, F., Calderara, S., Alletto, S., Cucchiara, R.: Compressed volumetric heatmaps for multi-person 3D pose estimation. In: CVPR (June 2020)","DOI":"10.1109\/CVPR42600.2020.00723"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Levine, S., Felsen, P., Malik, J.: Recurrent network models for human dynamics. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.494"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"G\u00e4rtner, E., Andriluka, M., Xu, H., Sminchisescu, C.: Trajectory optimization for physics-based reconstruction of 3D human pose from monocular video. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01276"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Goel, S., Pavlakos, G., Rajasegaran, J., Kanazawa, A., Malik, J.: Humans in 4D: reconstructing and tracking humans with transformers. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01358"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Gopalakrishnan, A., Mali, A., Kifer, D., Giles, L., Ororbia, A.G.: A neural temporal model for human motion prediction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01239"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Guler, R.A., Kokkinos, I.: HoloPose: holistic 3D human reconstruction in-the-wild. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01114"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Guzov, V., Mir, A., Sattler, T., Pons-Moll, G.: Human POSEitioning system (HPS): 3D human pose estimation and self-localization in large scenes from body-mounted sensors. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00430"},{"issue":"4","key":"24_CR18","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1145\/3386569.3392480","volume":"39","author":"FG Harvey","year":"2020","unstructured":"Harvey, F.G., Yurick, M., Nowrouzezahrai, D., Pal, C.: Robust motion in-betweening. ACM Trans. Graph. (TOG) 39(4), 60\u20131 (2020)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Hassan, M., et al.: Stochastic scene-aware motion prediction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01118"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Hassan, M., Choutas, V., Tzionas, D., Black, M.J.: Resolving 3D human pose ambiguities with 3D scene constraints. In: ICCV, pp. 2282\u20132292 (2019)","DOI":"10.1109\/ICCV.2019.00237"},{"key":"24_CR21","unstructured":"He, C., Saito, J., Zachary, J., Rushmeier, H., Zhou, Y.: NeMF: neural motion fields for kinematic animation. In: NeurIPS (2022)"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Henning, D.F., Laidlow, T., Leutenegger, S.: Bodyslam: Joint camera localisation, mapping, and human motion tracking. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19842-7_38"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Hernandez, A., Gall, J., Moreno-Noguer, F.: Human motion prediction via spatio-temporal inpainting. In: CVPR (2019)","DOI":"10.1109\/ICCV.2019.00723"},{"key":"24_CR24","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: NeurIPs (2020)"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Huang, C.H.P., et al.: Capturing and inferring dense full-body human-scene contact. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01292"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: Diffusion-based generation, optimization, and planning in 3D scenes. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01607"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Iqbal, U., Molchanov, P., Kautz, J.: Weakly-supervised 3D human pose learning via multi-view images in the wild. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00529"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Iqbal, U., Xie, K., Guo, Y., Kautz, J., Molchanov, P.: KAMA: 3D keypoint aware body mesh articulation. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00078"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Isogawa, M., Yuan, Y., O\u2019Toole, M., Kitani, K.M.: Optical non-line-of-sight physics-based 3D human pose estimation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00704"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Jain, A., Zamir, A.R., Savarese, S., Saxena, A.: Structural-RNN: deep learning on spatio-temporal graphs. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.573"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Jiang, W., Kolotouros, N., Pavlakos, G., Zhou, X., Daniilidis, K.: Coherent reconstruction of multiple humans from a single image. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00562"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Joo, H., Neverova, N., Vedaldi, A.: Exemplar fine-tuning for 3D human pose fitting towards in-the-wild 3D human pose estimation. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Zhang, J.Y., Felsen, P., Malik, J.: Learning 3D human dynamics from video. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00576"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Karunratanakul, K., Preechakul, K., Aksan, E., Beeler, T., Suwajanakorn, S., Tang, S.: Optimizing diffusion noise can serve as universal motion priors. arXiv preprint arXiv:2312.11994 (2023)","DOI":"10.1109\/CVPR52733.2024.00133"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Kaufmann, M., Aksan, E., Song, J., Pece, F., Ziegler, R., Hilliges, O.: Convolutional autoencoders for human motion infilling. In: 3DV (2020)","DOI":"10.1109\/3DV50981.2020.00102"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Kaufmann, M., et al.: EMDB: the Electromagnetic Database of Global 3D Human Pose and Shape in the Wild. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01345"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Khurana, T., Dave, A., Ramanan, D.: Detecting invisible people. In: ICCV, pp. 3174\u20133184 (2021)","DOI":"10.1109\/ICCV48922.2021.00316"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: VIBE: video inference for human body pose and shape estimation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Huang, C.H.P., Tesch, J., M\u00fcller, L., Hilliges, O., Black, M.J.: SPEC: seeing people in the wild with an estimated camera. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01085"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Kocabas, M., et al.: PACE: human and motion estimation from in-the-wild videos. In: 3DV (2024)","DOI":"10.1109\/3DV62453.2024.00103"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Black, M.J., Daniilidis, K.: Learning to reconstruct 3D human pose and shape via model-fitting in the loop. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00234"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Daniilidis, K.: Convolutional mesh regression for single-image human shape reconstruction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00463"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Jayaraman, D., Daniilidis, K.: Probabilistic modeling for human mesh recovery. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01140"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Kundu, J.N., Rakesh, M., Jampani, V., Venkatesh, R.M., Babu1, R.V.: Appearance consensus driven self-supervised human mesh recovery. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58452-8_46"},{"key":"24_CR46","doi-asserted-by":"crossref","unstructured":"Lassner, C., Romero, J., Kiefel, M., Bogo, F., Black, M.J., Gehler, P.V.: Unite the people: closing the loop between 3D and 2D human representations. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.500"},{"key":"24_CR47","doi-asserted-by":"crossref","unstructured":"Li, J., Bian, S., Xu, C., Liu, G., Yu, G., Lu, C.: D &d: learning human dynamics from dynamic camera. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20065-6_28"},{"key":"24_CR48","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, C., Chen, Z., Bian, S., Yang, L., Lu, C.: HybrIK: a hybrid analytical-neural inverse kinematics solution for 3D human pose and shape estimation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00339"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Li, Z., Liu, J., Zhang, Z., Xu, S., Yan, Y.: CLIFF: carrying location information in full frames into human pose and shape estimation. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20065-6_34"},{"key":"24_CR50","unstructured":"Li, Z., Zhou, Y., Xiao, S., He, C., Huang, Z., Li, H.: Auto-conditioned recurrent networks for extended complex human motion synthesis. arXiv preprint arXiv:1707.05363 (2017)"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: End-to-end human pose and mesh reconstruction with transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Liu, M., Yang, D., Zhang, Y., Cui, Z., Rehg, J.M., Tang, S.: 4D human body capture from egocentric video via 3D scene grounding. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00101"},{"key":"24_CR53","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. SIGGRAPH Asia 34(6), 248:1\u2013248:16 (2015)","DOI":"10.1145\/2816795.2818013"},{"key":"24_CR54","doi-asserted-by":"crossref","unstructured":"Luo, Z., Golestaneh, S.A., Kitani, K.M.: 3D human motion estimation via motion compression and refinement. In: ACCV (2020)","DOI":"10.1007\/978-3-030-69541-5_20"},{"key":"24_CR55","unstructured":"Luo, Z., Hachiuma, R., Yuan, Y., Kitani, K.: Dynamics-regulated kinematic policy for egocentric pose estimation. NeurIPS 34 (2021)"},{"key":"24_CR56","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: AMASS: archive of motion capture as surface shapes. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"24_CR57","doi-asserted-by":"crossref","unstructured":"von Marcard, T., Henschel, R., Black, M., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3D human pose in the wild using IMUs and a moving camera. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"24_CR58","doi-asserted-by":"crossref","unstructured":"Martinez, J., Black, M.J., Romero, J.: On human motion prediction using recurrent neural networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.497"},{"key":"24_CR59","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: 3DV (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"24_CR60","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: XNect: real-time multi-person 3D motion capture with a single RGB camera. In: SIGGRAPH (2020)","DOI":"10.1145\/3386569.3392410"},{"key":"24_CR61","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: VNect: Real-time 3D human pose estimation with a single RGB camera. In: SIGGRAPH (2017)","DOI":"10.1145\/3072959.3073596"},{"key":"24_CR62","doi-asserted-by":"crossref","unstructured":"Moon, G., Chang, J.Y., Lee, K.M.: Camera distance-aware top-down approach for 3D multi-person pose estimation from a single RGB image. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.01023"},{"key":"24_CR63","doi-asserted-by":"crossref","unstructured":"Moon, G., Lee, K.M.: I2L-MeshNet: image-to-Lixel prediction network for accurate 3D human pose and mesh estimation from a single RGB image. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58571-6_44"},{"key":"24_CR64","doi-asserted-by":"crossref","unstructured":"M\u00fcller, L., Osman, A.A.A., Tang, S., Huang, C.H.P., Black, M.J.: On self contact and human pose. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00986"},{"key":"24_CR65","doi-asserted-by":"crossref","unstructured":"M\u00fcller, L., Ye, V., Pavlakos, G., Black, M., Kanazawa, A.: Generative Proxemics: a prior for 3D social interaction from images. arXiv preprint arXiv:2306.09337 (2023)","DOI":"10.1109\/CVPR52733.2024.00925"},{"key":"24_CR66","unstructured":"OpenSfM - a structure from motion library. https:\/\/github.com\/mapillary\/OpenSfM (2021). https:\/\/github.com\/mapillary\/OpenSfM"},{"key":"24_CR67","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive Body Capture: 3D hands, face, and body from a single image. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"24_CR68","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Kolotouros, N., Daniilidis, K.: TexturePose: supervising human mesh estimation with texture consistency. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00089"},{"key":"24_CR69","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Weber, E., Tancik, M., Kanazawa, A.: The one where they reconstructed 3D humans and environments in TV shows. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19836-6_41"},{"key":"24_CR70","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhu, L., Zhou, X., Daniilidis, K.: Learning to estimate 3D human pose and shape from a single color image. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00055"},{"key":"24_CR71","unstructured":"Pavllo, D., Grangier, D., Auli, M.: QuaterNet: a quaternion-based recurrent model for human motion. In: BMVC (2018)"},{"key":"24_CR72","unstructured":"Payer, C., Neff, T., Bischof, H., Urschler, M., Stern, D.: Simultaneous multi-person detection and single-person pose estimation with a single heatmap regression network. In: ICCV PoseTrack Workshop (2017)"},{"key":"24_CR73","doi-asserted-by":"crossref","unstructured":"Petrovich, M., Black, M.J., Varol, G.: Action-conditioned 3D human motion synthesis with transformer VAE. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"24_CR74","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: Text-to-3D using 2D diffusion. In: ICLR (2023)"},{"key":"24_CR75","doi-asserted-by":"crossref","unstructured":"Reddy, N.D., Guigues, L., Pischulini, L., Eledath, J., Narasimhan, S.: TesseTrack: end-to-end learnable multi-person articulated 3D pose tracking. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01494"},{"key":"24_CR76","doi-asserted-by":"crossref","unstructured":"Rempe, D., Birdal, T., Hertzmann, A., Yang, J., Sridhar, S., Guibas, L.J.: HuMoR: 3D human motion model for robust pose estimation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01129"},{"key":"24_CR77","doi-asserted-by":"crossref","unstructured":"Rogez, G., Weinzaepfel, P., Schmid, C.: LCR-Net: localization-classification-regression for human pose. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.134"},{"key":"24_CR78","doi-asserted-by":"crossref","unstructured":"Rong, Y., Liu, Z., Li, C., Cao, K., Change\u00a0Loy, C.: Delving deep into hybrid annotations for 3D human recovery in the wild. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00544"},{"key":"24_CR79","doi-asserted-by":"crossref","unstructured":"S\u00e1r\u00e1ndi, I., Hermans, A., Leibe, B.: Learning 3D human pose estimation from dozens of datasets using a geometry-aware autoencoder to bridge between skeleton formats. In: WACV (2023)","DOI":"10.1109\/WACV56688.2023.00297"},{"key":"24_CR80","doi-asserted-by":"crossref","unstructured":"Shimada, S., Golyanik, V., Xu, W., Theobalt, C.: PhysCap: physically plausible monocular 3D motion capture in real time. In: SIGGRAPH (2020)","DOI":"10.1145\/3414685.3417877"},{"key":"24_CR81","doi-asserted-by":"crossref","unstructured":"Shin, S., Kim, J., Halilaj, E., Black, M.J.: WHAM: reconstructing world-grounded humans with accurate 3D motion. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00202"},{"key":"24_CR82","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: ICML (2015)"},{"key":"24_CR83","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: ICLR (2021)"},{"key":"24_CR84","doi-asserted-by":"crossref","unstructured":"Song, J., Chen, X., Hilliges, O.: Human body model fitting by learned gradient descent. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58565-5_44"},{"key":"24_CR85","doi-asserted-by":"crossref","unstructured":"Sun, Y., Bao, Q., Liu, W., Fu, Y., Black, M.J., Mei, T.: Monocular, one-stage, regression of multiple 3D people. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01099"},{"key":"24_CR86","doi-asserted-by":"crossref","unstructured":"Sun, Y., Bao, Q., Liu, W., Mei, T., Black, M.J.: TRACE: 5D temporal regression of avatars with dynamic cameras in 3D environments. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00855"},{"key":"24_CR87","doi-asserted-by":"crossref","unstructured":"Sun, Y., Liu, W., Bao, Q., Fu, Y., Mei, T., Black, M.J.: Putting people in their place: Monocular regression of 3D people in depth. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01289"},{"key":"24_CR88","doi-asserted-by":"crossref","unstructured":"Sun, Y., Ye, Y., Liu, W., Gao, W., Fu, Y., , Mei, T.: Human mesh recovery from monocular images via a skeleton-disentangled representation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00545"},{"key":"24_CR89","unstructured":"Teed, Z., Deng, J.: DROID-SLAM: deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras. In: NeurIPs (2021)"},{"key":"24_CR90","doi-asserted-by":"crossref","unstructured":"Teed, Z., Lipson, L., Deng, J.: Deep patch visual odometry. In: NeurIPS (2023)","DOI":"10.1007\/978-3-031-72627-9_24"},{"key":"24_CR91","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.H.: Human motion diffusion model. In: ICLR 2023 (2022)"},{"key":"24_CR92","unstructured":"Villegas, R., Yang, J., Zou, Y., Sohn, S., Lin, X., Lee, H.: Learning to generate long-term future via hierarchical prediction. In: ICML (2017)"},{"key":"24_CR93","doi-asserted-by":"crossref","unstructured":"Weng, Z., Yeung, S.: Holistic 3D human and scene mesh estimation from single view images. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00040"},{"key":"24_CR94","doi-asserted-by":"crossref","unstructured":"Xiang, D., Joo, H., Sheikh, Y.: Monocular total capture: Posing face, body and hands in the wild. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01122"},{"key":"24_CR95","doi-asserted-by":"crossref","unstructured":"Xie, K., Wang, T., Iqbal, U., Guo, Y., Fidler, S., Shkurti, F.: Physics-based human motion estimation and synthesis from videos. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01133"},{"key":"24_CR96","unstructured":"Xie, Y., Jampani, V., Zhong, L., Sun, D., Jiang, H.: OmniControl: control any joint at any time for human motion generation. In: ICLR (2024)"},{"key":"24_CR97","doi-asserted-by":"crossref","unstructured":"Xu, Y., Zhu, S.C., Tung, T.: DenseRaC: joint 3D pose and shape estimation by dense render-and-compare. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00785"},{"key":"24_CR98","doi-asserted-by":"crossref","unstructured":"Yan, X., et al.: MT-VAE: learning motion transformations to generate multimodal human dynamics. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01228-1_17"},{"key":"24_CR99","doi-asserted-by":"crossref","unstructured":"Ye, V., Pavlakos, G., Malik, J., Kanazawa, A.: Decoupling human and camera motion from videos in the wild. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02033"},{"key":"24_CR100","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Iqbal, U., Molchanov, P., Kitani, K., Kautz, J.: GLAMR: global occlusion-aware human mesh recovery with dynamic cameras. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01076"},{"key":"24_CR101","unstructured":"Yuan, Y., Kitani, K.: Diverse trajectory forecasting with determinantal point processes. In: ICLR 2020 (2019)"},{"key":"24_CR102","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Kitani, K.: DLow: diversifying latent flows for diverse human motion prediction. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58545-7_20"},{"key":"24_CR103","unstructured":"Yuan, Y., Kitani, K.: Residual force control for agile human behavior imitation and extended motion synthesis. In: NeurIPS (2020)"},{"key":"24_CR104","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Song, J., Iqbal, U., Vahdat, A., Kautz, J.: PhysDiff: physics-guided human motion diffusion model. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01467"},{"key":"24_CR105","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Wei, S.E., Simon, T., Kitani, K., Saragih, J.: SimPoE: simulated character control for 3D human pose estimation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00708"},{"key":"24_CR106","doi-asserted-by":"crossref","unstructured":"Zanfir, A., Bazavan, E.G., Xu, H., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: Weakly supervised 3D human pose and shape reconstruction with normalizing flows. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58539-6_28"},{"key":"24_CR107","doi-asserted-by":"crossref","unstructured":"Zanfir, A., Marinoiu, E., Sminchisescu, C.: Monocular 3D pose and shape estimation of multiple people in natural scenes the importance of multiple scene constraints. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00229"},{"key":"24_CR108","unstructured":"Zanfir, A., Marinoiu, E., Zanfir, M., Popa, A.I., Sminchisescu, C.: Deep network for the integrated 3D sensing of multiple people in natural images. In: NeurIPS (2018)"},{"key":"24_CR109","doi-asserted-by":"crossref","unstructured":"Zanfir, M., Zanfir, A., Bazavan, E.G., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: THUNDR: transformer-based 3D human reconstruction with markers. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01273"},{"key":"24_CR110","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: PyMAF: 3D human pose and shape regression with pyramidal mesh alignment feedback loop. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"24_CR111","doi-asserted-by":"crossref","unstructured":"Zhang, J., Yu, D., Liew, J.H., Nie, X., Feng, J.: Body meshes as points. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00061"},{"key":"24_CR112","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"24_CR113","unstructured":"Zhang, M., et al.: MotionDiffuse: text-driven human motion generation with diffusion model. arXiv preprint arXiv:2208.15001 (2022)"},{"key":"24_CR114","doi-asserted-by":"crossref","unstructured":"Zhang, S., et al.: RoHM: robust human motion reconstruction via diffusion. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01384"},{"key":"24_CR115","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhang, Y., Bogo, F., Pollefeys, M., Tang, S.: Learning motion priors for 4D human body capture in 3D scenes. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01115"},{"key":"24_CR116","doi-asserted-by":"crossref","unstructured":"Zhang, T., Huang, B., Wang, Y.: Object-occluded human shape and pose estimation from a single color image. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00740"},{"key":"24_CR117","doi-asserted-by":"crossref","unstructured":"Zhen, J., Fang, Q., Sun, J., Liu, W., Jiang, W., Bao, H., Zhou, X.: SMAP: single-shot multi-person absolute 3D pose estimation. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58555-6_33"},{"key":"24_CR118","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Habermann, M., Habibie, I., Tewari, A., Theobalt, C., Xu, F.: Monocular real-time full body capture with inter-part correlations. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00478"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72640-8_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:50:13Z","timestamp":1730109013000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72640-8_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031726392","9783031726408"],"references-count":118,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72640-8_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}