{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:36:51Z","timestamp":1768073811533,"version":"3.49.0"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727535","type":"print"},{"value":"9783031727542","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72754-2_2","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:57:07Z","timestamp":1730300227000},"page":"20-37","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["WHAC: World-Grounded Humans and\u00a0Cameras"],"prefix":"10.1007","author":[{"given":"Wanqi","family":"Yin","sequence":"first","affiliation":[]},{"given":"Zhongang","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Ruisi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Fanzhou","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Haiyi","family":"Mei","sequence":"additional","affiliation":[]},{"given":"Weiye","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Zhitao","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Qingping","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Atsushi","family":"Yamashita","sequence":"additional","affiliation":[]},{"given":"Ziwei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"2_CR1","unstructured":"Bazavan, E.G., Zanfir, A., Zanfir, M., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: Hspace: synthetic parametric humans animated in complex environments. arXiv preprint arXiv:2112.12867 (2021)"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Black, M.J., Patel, P., Tesch, J., Yang, J.: Bedlam: a synthetic dataset of bodies exhibiting detailed lifelike animated motion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8726\u20138737 (2023)","DOI":"10.1109\/CVPR52729.2023.00843"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Cai, Z., et\u00a0al.: Digital life project: autonomous 3d characters with social intelligence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 582\u2013592 (2024)","DOI":"10.1109\/CVPR52733.2024.00062"},{"key":"2_CR4","doi-asserted-by":"publisher","unstructured":"Cai, Z., et\u00a0al.: Humman: multi-modal 4d human dataset for versatile sensing and modeling. In: Proceedings of the European Conference on Computer Vision, pp. 557\u2013577. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20071-7_33","DOI":"10.1007\/978-3-031-20071-7_33"},{"key":"2_CR5","unstructured":"Cai, Z., et al.: Smpler-x: scaling up expressive human pose and shape estimation. In: Oh, A., Neumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems. vol.\u00a036, pp. 11454\u201311468. Curran Associates, Inc. (2023)"},{"key":"2_CR6","unstructured":"Cai, Z., et al.: Playing for 3d human recovery. arXiv preprint arXiv:2110.07588 (2021)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, W., et\u00a0al.: Dna-rendering: a diverse neural actor repository for high-fidelity human-centric rendering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19982\u201319993 (2023)","DOI":"10.1109\/ICCV51070.2023.01829"},{"key":"2_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-030-58607-2_2","volume-title":"Computer Vision \u2013 ECCV 2020","author":"V Choutas","year":"2020","unstructured":"Choutas, V., Pavlakos, G., Bolkart, T., Tzionas, D., Black, M.J.: Monocular expressive body regression through body-driven attention. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 20\u201340. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_2"},{"key":"2_CR9","unstructured":"Contributors, X.: Openxrlab synthetic data rendering toolbox (2023). https:\/\/github.com\/openxrlab\/xrfeitoria"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Feng, Y., Choutas, V., Bolkart, T., Tzionas, D., Black, M.J.: Collaborative regression of expressive bodies using moderation. In: Proceedings of the International Conference on 3D Vision, pp. 792\u2013804. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00088"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Guzov, V., Mir, A., Sattler, T., Pons-Moll, G.: Human positioning system (hps): 3d human pose estimation and self-localization in large scenes from body-mounted sensors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4318\u20134329 (2021)","DOI":"10.1109\/CVPR46437.2021.00430"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Hasler, N., Rosenhahn, B., Thormahlen, T., Wand, M., Gall, J., Seidel, H.P.: Markerless motion capture with unsynchronized moving cameras. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 224\u2013231. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206859"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Huang, B., Shu, Y., Zhang, T., Wang, Y.: Dynamic multi-person mesh recovery from uncalibrated multi-view cameras. In: Proceedings of the International Conference on 3D Vision, pp. 710\u2013720. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00080"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Huang, C.H.P., et al.: Capturing and inferring dense full-body human-scene contact. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13274\u201313285 (2022)","DOI":"10.1109\/CVPR52688.2022.01292"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Johnson, S., Everingham, M.: Clustered pose and nonlinear appearance models for human pose estimation. In: Proceedings of the British Machine Vision Conference, pp. 1\u201311. British Machine Vision Association (2010)","DOI":"10.5244\/C.24.12"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Joo, H., et al.: Panoptic studio: a massively multiview system for social motion capture. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3334\u20133342 (2015)","DOI":"10.1109\/ICCV.2015.381"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Kaufmann, M., et al.: Emdb: the electromagnetic database of global 3d human pose and shape in the wild. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14632\u201314643 (2023)","DOI":"10.1109\/ICCV51070.2023.01345"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Kissos, I., Fritz, L., Goldman, M., Meir, O., Oks, E., Kliger, M.: Beyond weak perspective for monocular 3d human pose estimation. In: Proceedings of the European Conference on Computer Vision, pp. 541\u2013554. Springer (2020)","DOI":"10.1007\/978-3-030-66096-3_37"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Kocabas, M., et al.: Pace: human and camera motion estimation from in-the-wild videos. In: Proceedings of the International Conference on 3D Vision, pp. 397\u2013408. IEEE (2024)","DOI":"10.1109\/3DV62453.2024.00103"},{"key":"2_CR20","unstructured":"Li, J., Bian, S., Xu, C., Chen, Z., Yang, L., Lu, C.: Hybrik-x: hybrid analytical-neural inverse kinematics for whole-body mesh recovery. arXiv preprint arXiv:2304.05690 (2023)"},{"key":"2_CR21","doi-asserted-by":"publisher","unstructured":"Li, J., Bian, S., Xu, C., Liu, G., Yu, G., Lu, C.: D &d: learning human dynamics from dynamic camera. In: Proceedings of the European Conference on Computer Vision, pp. 479\u2013496. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_28","DOI":"10.1007\/978-3-031-20065-6_28"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Lin, J., Zeng, A., Wang, H., Zhang, L., Li, Y.: One-stage 3d whole-body mesh recovery with component aware transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21159\u201321168 (2023)","DOI":"10.1109\/CVPR52729.2023.02027"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Liu, M., Yang, D., Zhang, Y., Cui, Z., Rehg, J.M., Tang, S.: 4d human body capture from egocentric video via 3d scene grounding. In: Proceedings of the International Conference on 3D Vision, pp. 930\u2013939. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00101"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Luvizon, D.C., Habermann, M., Golyanik, V., Kortylewski, A., Theobalt, C.: Scene-aware 3d multi-human motion capture from a single camera. In: Computer Graphics Forum, vol.\u00a042, pp. 371\u2013383. Wiley Online Library (2023)","DOI":"10.1111\/cgf.14768"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: Amass: archive of motion capture as surface shapes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"von Marcard, T., Henschel, R., Black, M.J., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3d human pose in the wild using imus and a moving camera. In: Proceedings of the European Conference on Computer Vision, pp. 601\u2013617 (2018)","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Moon, G., Choi, H., Lee, K.M.: Accurate 3d hand pose estimation for whole-body 3d human mesh estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2308\u20132317 (2022)","DOI":"10.1109\/CVPRW56347.2022.00257"},{"key":"2_CR28","unstructured":"Pang, H.E., et al.: Towards robust and expressive whole-body human pose and shape estimation. In: Oh, A., Neumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 17330\u201317344. Curran Associates, Inc. (2023)"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3d hands, face, and body from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Peng, S., et al.: Neural body: implicit neural representations with structured latent codes for novel view synthesis of dynamic humans. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9054\u20139063 (2021)","DOI":"10.1109\/CVPR46437.2021.00894"},{"key":"2_CR31","doi-asserted-by":"crossref","unstructured":"Rao, A., et al.: Dynamic storyboard generation in an engine-based virtual environment for video production. In: ACM SIGGRAPH 2023 Posters, pp.\u00a01\u20132 (2023)","DOI":"10.1145\/3588028.3603647"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Rempe, D., Birdal, T., Hertzmann, A., Yang, J., Sridhar, S., Guibas, L.J.: Humor: 3d human motion model for robust pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11488\u201311499 (2021)","DOI":"10.1109\/ICCV48922.2021.01129"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Rong, Y., Shiratori, T., Joo, H.: Frankmocap: a monocular 3d whole-body pose estimation system via regression and integration. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1749\u20131759 (2021)","DOI":"10.1109\/ICCVW54120.2021.00201"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Schonberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4104\u20134113 (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Shin, S., Kim, J., Halilaj, E., Black, M.J.: Wham: reconstructing world-grounded humans with accurate 3d motion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2070\u20132080 (2024)","DOI":"10.1109\/CVPR52733.2024.00202"},{"key":"2_CR36","unstructured":"Siyao, L., et al.: Duolando: follower gpt with off-policy reinforcement learning for dance accompaniment. In: Proceedings of the Twelfth International Conference on Learning Representations (2023)"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Sun, Y., Bao, Q., Liu, W., Mei, T., Black, M.J.: Trace: 5d temporal regression of avatars with dynamic cameras in 3d environments. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8856\u20138866 (2023)","DOI":"10.1109\/CVPR52729.2023.00855"},{"key":"2_CR38","first-page":"16558","volume":"34","author":"Z Teed","year":"2021","unstructured":"Teed, Z., Deng, J.: Droid-slam: deep visual slam for monocular, stereo, and rgb-d cameras. Adv. Neural. Inf. Process. Syst. 34, 16558\u201316569 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Teed, Z., Lipson, L., Deng, J.: Deep patch visual odometry. Advances in Neural Information Processing Systems 36 (2024)","DOI":"10.1007\/978-3-031-72627-9_24"},{"issue":"04","key":"2_CR40","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1109\/34.88573","volume":"13","author":"S Umeyama","year":"1991","unstructured":"Umeyama, S.: Least-squares estimation of transformation parameters between two point patterns. IEEE Trans. Pattern Anal. Mach. Intell. 13(04), 376\u2013380 (1991)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Synbody: synthetic dataset with layered human models for 3d human perception and modeling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20282\u201320292 (October 2023)","DOI":"10.1109\/ICCV51070.2023.01855"},{"key":"2_CR42","doi-asserted-by":"crossref","unstructured":"Ye, V., Pavlakos, G., Malik, J., Kanazawa, A.: Decoupling human and camera motion from videos in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21222\u201321232 (2023)","DOI":"10.1109\/CVPR52729.2023.02033"},{"issue":"6","key":"2_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480504","volume":"40","author":"R Yu","year":"2021","unstructured":"Yu, R., Park, H., Lee, J.: Human dynamics from monocular video with dynamic camera movements. ACM Trans. Graph. 40(6), 1\u201314 (2021)","journal-title":"ACM Trans. Graph."},{"key":"2_CR44","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Iqbal, U., Molchanov, P., Kitani, K., Kautz, J.: Glamr: Global occlusion-aware human mesh recovery with dynamic cameras. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11038\u201311049 (2022)","DOI":"10.1109\/CVPR52688.2022.01076"},{"key":"2_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, H., Tian, Y., Zhang, Y., Li, M., An, L., Sun, Z., Liu, Y.: Pymaf-x: towards well-aligned full-body model regression from monocular images. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3271691"},{"key":"2_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, S., et al.: Egobody: human body shape and motion of interacting people from head-mounted devices. In: Proceedings of the European Conference on Computer Vision, pp. 180\u2013200. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_11"},{"key":"2_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, Y., An, L., Yu, T., Li, X., Li, K., Liu, Y.: 4d association graph for realtime multi-person motion capture using multiple video cameras. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1324\u20131333 (2020)","DOI":"10.1109\/CVPR42600.2020.00140"},{"key":"2_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Habermann, M., Habibie, I., Tewari, A., Theobalt, C., Xu, F.: Monocular real-time full body capture with inter-part correlations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4811\u20134822 (2021)","DOI":"10.1109\/CVPR46437.2021.00478"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72754-2_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:59:00Z","timestamp":1730300340000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72754-2_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031727535","9783031727542"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72754-2_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}