{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T06:26:28Z","timestamp":1775715988748,"version":"3.50.1"},"publisher-location":"Cham","reference-count":104,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031200670","type":"print"},{"value":"9783031200687","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20068-7_11","type":"book-chapter","created":{"date-parts":[[2022,11,10]],"date-time":"2022-11-10T08:06:38Z","timestamp":1668067598000},"page":"180-200","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":74,"title":["EgoBody: Human Body Shape and\u00a0Motion of\u00a0Interacting People from\u00a0Head-Mounted Devices"],"prefix":"10.1007","author":[{"given":"Siwei","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Qianli","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhiyin","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Taein","family":"Kwon","sequence":"additional","affiliation":[]},{"given":"Marc","family":"Pollefeys","sequence":"additional","affiliation":[]},{"given":"Federica","family":"Bogo","sequence":"additional","affiliation":[]},{"given":"Siyu","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,11]]},"reference":[{"key":"11_CR1","unstructured":"Azure Kinect. https:\/\/docs.microsoft.com\/en-us\/azure\/kinect-dk\/"},{"key":"11_CR2","unstructured":"LAAN Labs 3D Scanner app. https:\/\/apps.apple.com\/us\/app\/3d-scanner-app\/id1419913995"},{"key":"11_CR3","unstructured":"Microsoft Hololens2. https:\/\/www.microsoft.com\/en-us\/hololens"},{"key":"11_CR4","unstructured":"SMPL model transfer. https:\/\/github.com\/vchoutas\/smplx\/tree\/master\/transfer_mode"},{"issue":"1","key":"11_CR5","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/TPAMI.2006.21","volume":"28","author":"A Agarwal","year":"2005","unstructured":"Agarwal, A., Triggs, B.: Recovering 3d human pose from monocular images. IEEE Trans. Pattern Anal. Mach. Intell. 28(1), 44\u201358 (2005)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR6","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.cviu.2018.05.001","volume":"171","author":"M Aghaei","year":"2018","unstructured":"Aghaei, M., Dimiccoli, M., Ferrer, C.C., Radeva, P.: Towards social pattern characterization in egocentric photo-streams. Comput. Vis. Image Underst. 171, 104\u2013117 (2018)","journal-title":"Comput. Vis. Image Underst."},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Aghaei, M., Dimiccoli, M., Radeva, P.: With whom do i interact? Detecting social interactions in egocentric photo-streams. In: 2016 23rd International Conference on Pattern Recognition (ICPR), pp. 2959\u20132964. IEEE (2016)","DOI":"10.1109\/ICPR.2016.7900087"},{"key":"11_CR8","unstructured":"A. Nisbet, R.: The Social Bond: An Introduction to the Study of Society (1970)"},{"key":"11_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-540-88688-4_2","volume-title":"Computer Vision \u2013 ECCV 2008","author":"AO B\u0103lan","year":"2008","unstructured":"B\u0103lan, A.O., Black, M.J.: The naked truth: estimating body shape under clothing. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008. LNCS, vol. 5303, pp. 15\u201329. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-88688-4_2"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Bambach, S., Lee, S., Crandall, D.J., Yu, C.: Lending a hand: detecting hands and recognizing activities in complex egocentric interactions. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1949\u20131957 (2015)","DOI":"10.1109\/ICCV.2015.226"},{"issue":"2","key":"11_CR11","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1109\/34.121791","volume":"14","author":"P Besl","year":"1992","unstructured":"Besl, P., McKay, N.D.: A method for registration of 3-d shapes. IEEE Trans. Pattern Anal. Mach. Intell. 14(2), 239\u2013256 (1992)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/978-3-319-46454-1_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"F Bogo","year":"2016","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., Black, M.J.: Keep it SMPL: automatic estimation of 3d human pose and shape from a single image. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 561\u2013578. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_34"},{"key":"11_CR13","unstructured":"Cao, Z., Hidalgo Martinez, G., Simon, T., Wei, S., Sheikh, Y.A.: Openpose: Realtime multi-person 2d pose estimation using part affinity fields. In: IEEE Transactions on Pattern Analysis and Machine Intelligence (2019)"},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Choi, H., Moon, G., Chang, J.Y., Lee, K.M.: Beyond static features for temporally consistent 3d human pose and shape from a video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1964\u20131973 (2021)","DOI":"10.1109\/CVPR46437.2021.00200"},{"key":"11_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1007\/978-3-030-58571-6_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Choi","year":"2020","unstructured":"Choi, H., Moon, G., Lee, K.M.: Pose2Mesh: graph convolutional network for 3d human pose and mesh recovery from a 2d human pose. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 769\u2013787. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_45"},{"key":"11_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/978-3-030-01225-0_44","volume-title":"Computer Vision \u2013 ECCV 2018","author":"D Doughty","year":"2018","unstructured":"Doughty, D., et al.: Scaling egocentric vision: the dataset. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 753\u2013771. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_44"},{"issue":"1","key":"11_CR17","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/s11263-021-01531-2","volume":"130","author":"D Damen","year":"2022","unstructured":"Damen, D., et al.: Rescaling egocentric vision: collection, pipeline and challenges for epic-kitchens-100. Int. J. Comput. Vision 130(1), 33\u201355 (2022)","journal-title":"Int. J. Comput. Vision"},{"issue":"12","key":"11_CR18","doi-asserted-by":"publisher","first-page":"2907","DOI":"10.1016\/j.jstrokecerebrovasdis.2016.08.004","volume":"25","author":"A Dhand","year":"2016","unstructured":"Dhand, A., Dalton, A.E., Luke, D.A., Gage, B.F., Lee, J.M.: Accuracy of wearable cameras to track social interactions in stroke survivors. J. Stroke Cerebrovasc. Dis. 25(12), 2907\u20132910 (2016)","journal-title":"J. Stroke Cerebrovasc. Dis."},{"key":"11_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1007\/978-3-030-58536-5_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Dong","year":"2020","unstructured":"Dong, J., Shuai, Q., Zhang, Y., Liu, X., Zhou, X., Bao, H.: Motion capture from internet videos. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 210\u2013227. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_13"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Fang, Q., Shuai, Q., Dong, J., Bao, H., Zhou, X.: Reconstructing 3d human pose by watching humans in the mirror. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12814\u201312823 (2021)","DOI":"10.1109\/CVPR46437.2021.01262"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Fathi, A., Hodgins, J.K., Rehg, J.M.: Social interactions: a first-person perspective. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1226\u20131233. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247805"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Fathi, A., Farhadi, A., Rehg, J.M.: Understanding egocentric activities. In: 2011 International Conference on Computer Visio, pp. 407\u2013414. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126269"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Fieraru, M., Zanfir, M., Oneata, E., Popa, A.I., Olaru, V., Sminchisescu, C.: Three-dimensional reconstruction of human interactions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7214\u20137223 (2020)","DOI":"10.1109\/CVPR42600.2020.00724"},{"issue":"1\u20132","key":"11_CR24","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/s11263-008-0173-1","volume":"87","author":"J Gall","year":"2010","unstructured":"Gall, J., Rosenhahn, B., Brox, T., Seidel, H.P.: Optimization and filtering for human motion capture. Int. J. Comput. Vision 87(1\u20132), 75 (2010)","journal-title":"Int. J. Comput. Vision"},{"issue":"1","key":"11_CR25","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/BF02291478","volume":"40","author":"JC Gower","year":"1975","unstructured":"Gower, J.C.: Generalized Procrustes analysis. Psychometrika 40(1), 33\u201351 (1975)","journal-title":"Psychometrika"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Grauman, K., Shakhnarovich, G., Darrell, T.: Inferring 3d structure with a statistical image-based shape model. In: ICCV, vol. 3, p. 641 (2003)","DOI":"10.1109\/ICCV.2003.1238408"},{"key":"11_CR27","unstructured":"Grauman, K., et al.: Ego4D: Around the world in 3000 hours of egocentric video. arXiv preprint arXiv:2110.07058 (2021)"},{"key":"11_CR28","doi-asserted-by":"crossref","unstructured":"Guler, R.A., Kokkinos, I.: Holopose: Holistic 3d human reconstruction in-the-wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 10884\u201310894 (2019)","DOI":"10.1109\/CVPR.2019.01114"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Guzov, V., Mir, A., Sattler, T., Pons-Moll, G.: Human positioning system (HPS): 3d human pose estimation and self-localization in large scenes from body-mounted sensors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4318\u20134329 (2021)","DOI":"10.1109\/CVPR46437.2021.00430"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Hassan, M., Choutas, V., Tzionas, D., Black, M.J.: Resolving 3d human pose ambiguities with 3d scene constraints. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2282\u20132292 (2019)","DOI":"10.1109\/ICCV.2019.00237"},{"key":"11_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Huang, Y., Bogo, F., Lassner, C., Kanazawa, A., Gehler, P.V., Romero, J., Akhter, I., Black, M.J.: Towards accurate marker-less human shape and pose estimation over time. In: 2017 International Conference on 3D Vision (3DV), pp. 421\u2013430. IEEE (2017)","DOI":"10.1109\/3DV.2017.00055"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"11_CR34","doi-asserted-by":"crossref","unstructured":"Jiang, H., Grauman, K.: Seeing invisible poses: estimating 3d body pose from egocentric video. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3501\u20133509. IEEE (2017)","DOI":"10.1109\/CVPR.2017.373"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Joo, H., Neverova, N., Vedaldi, A.: Exemplar fine-tuning for 3d human pose fitting towards in-the-wild 3d human pose estimation (2021)","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"11_CR36","doi-asserted-by":"crossref","unstructured":"Joo, H., Simon, T., Cikara, M., Sheikh, Y.: Towards social artificial intelligence: nonverbal social signal prediction in a triadic interaction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10873\u201310883 (2019)","DOI":"10.1109\/CVPR.2019.01113"},{"issue":"1","key":"11_CR37","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TPAMI.2017.2782743","volume":"41","author":"H Joo","year":"2017","unstructured":"Joo, H., et al.: Panoptic studio: a massively multiview system for social interaction capture. IEEE Trans. Pattern Anal. Mach. Intell. 41(1), 190\u2013204 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR38","doi-asserted-by":"crossref","unstructured":"Joo, H., Simon, T., Sheikh, Y.: Total capture: a 3d deformation model for tracking faces, hands, and bodies. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8320\u20138329 (2018)","DOI":"10.1109\/CVPR.2018.00868"},{"key":"11_CR39","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"11_CR40","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Zhang, J.Y., Felsen, P., Malik, J.: Learning 3d human dynamics from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5614\u20135623 (2019)","DOI":"10.1109\/CVPR.2019.00576"},{"key":"11_CR41","unstructured":"Kay, W., et al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Kazakos, E., Nagrani, A., Zisserman, A., Damen, D.: Epic-fusion: audio-visual temporal binding for egocentric action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5492\u20135501 (2019)","DOI":"10.1109\/ICCV.2019.00559"},{"key":"11_CR43","doi-asserted-by":"crossref","unstructured":"Kitani, K.M., Okabe, T., Sato, Y., Sugimoto, A.: Fast unsupervised ego-action learning for first-person sports videos. In: CVPR 2011, pp. 3241\u20133248. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995406"},{"key":"11_CR44","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: Vibe: video inference for human body pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5253\u20135263 (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Huang, C.H.P., Hilliges, O., Black, M.J.: PARE: part attention regressor for 3D human body estimation. In: Proceedings International Conference on Computer Vision (ICCV), pp. 11127\u201311137. IEEE, October 2021","DOI":"10.1109\/ICCV48922.2021.01094"},{"key":"11_CR46","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Huang, C.H.P., Tesch, J., M\u00fcller, L., Hilliges, O., Black, M.J.: SPEC: Seeing people in the wild with an estimated camera. In: Proceedings of International Conference on Computer Vision (ICCV), pp. 11035\u201311045, October 2021","DOI":"10.1109\/ICCV48922.2021.01085"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Black, M.J., Daniilidis, K.: Learning to reconstruct 3d human pose and shape via model-fitting in the loop. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2252\u20132261 (2019)","DOI":"10.1109\/ICCV.2019.00234"},{"key":"11_CR48","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Daniilidis, K.: Convolutional mesh regression for single-image human shape reconstruction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00463"},{"key":"11_CR49","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Jayaraman, D., Daniilidis, K.: Probabilistic modeling for human mesh recovery. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01140"},{"key":"11_CR50","doi-asserted-by":"crossref","unstructured":"Kwon, T., Tekin, B., Stuhmer, J., Bogo, F., Pollefeys, M.: H2O: two hands manipulating objects for first person interaction recognition. In: International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.00998"},{"key":"11_CR51","unstructured":"Lab, C.G.: CMU Graphics Lab Motion Capture Database (2000). https:\/\/mocap.cs.cmu.edu\/"},{"key":"11_CR52","doi-asserted-by":"crossref","unstructured":"Lee, Y.J., Ghosh, J., Grauman, K.: Discovering important people and objects for egocentric video summarization. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1346\u20131353. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247820"},{"key":"11_CR53","doi-asserted-by":"crossref","unstructured":"Li, H., Cai, Y., Zheng, W.S.: Deep dual relation modeling for egocentric interaction recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7932\u20137941 (2019)","DOI":"10.1109\/CVPR.2019.00812"},{"key":"11_CR54","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, C., Chen, Z., Bian, S., Yang, L., Lu, C.: Hybrik: a hybrid analytical-neural inverse kinematics solution for 3d human pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3383\u20133393 (2021)","DOI":"10.1109\/CVPR46437.2021.00339"},{"key":"11_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1007\/978-3-030-01228-1_38","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Li","year":"2018","unstructured":"Li, Y., Liu, M., Rehg, J.M.: In the eye of beholder: joint learning of gaze and actions in first person video. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 639\u2013655. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_38"},{"key":"11_CR56","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: End-to-end human pose and mesh reconstruction with transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"11_CR57","doi-asserted-by":"crossref","unstructured":"Liu, M., Yang, D., Zhang, Y., Cui, Z., Rehg, J.M., Tang, S.: 4D human body capture from egocentric video via 3D scene grounding. In: 2021 International Conference on 3D Vision (3DV) (2021)","DOI":"10.1109\/3DV53792.2021.00101"},{"issue":"6","key":"11_CR58","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2816795.2818013","volume":"34","author":"M Loper","year":"2015","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. (TOG) 34(6), 1\u201316 (2015)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"11_CR59","doi-asserted-by":"crossref","unstructured":"Luo, Z., Golestaneh, S.A., Kitani, K.M.: 3d human motion estimation via motion compression and refinement. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69541-5_20"},{"key":"11_CR60","unstructured":"Luo, Z., Hachiuma, R., Yuan, Y., Iwase, S., Kitani, K.M.: Kinematics-guided reinforcement learning for object-aware 3d ego-pose estimation. arXiv preprint arXiv:2011.04837 (2020)"},{"key":"11_CR61","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: Amass: archive of motion capture as surface shapes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"11_CR62","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1007\/978-3-030-01249-6_37","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T von Marcard","year":"2018","unstructured":"von Marcard, T., Henschel, R., Black, M.J., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3d human pose in the wild using IMUs and a moving camera. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11214, pp. 614\u2013631. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01249-6_37"},{"issue":"8","key":"11_CR63","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TPAMI.2016.2522398","volume":"38","author":"T von Marcard","year":"2016","unstructured":"von Marcard, T., Pons-Moll, G., Rosenhahn, B.: Human pose estimation from video and IMUs. Trans. Pattern Anal. Mach. Intell. 38(8), 1533\u20131547 (2016)","journal-title":"Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR64","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3d human pose estimation in the wild using improved CNN supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516. IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"11_CR65","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/978-3-030-58571-6_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"G Moon","year":"2020","unstructured":"Moon, G., Lee, K.M.: I2L-MeshNet: image-to-lixel prediction network for accurate 3d human pose and mesh estimation from a single RGB image. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 752\u2013768. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_44"},{"key":"11_CR66","doi-asserted-by":"crossref","unstructured":"Narayan, S., Kankanhalli, M.S., Ramakrishnan, K.R.: Action and interaction recognition in first-person videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 512\u2013518 (2014)","DOI":"10.1109\/CVPRW.2014.82"},{"key":"11_CR67","doi-asserted-by":"crossref","unstructured":"Ng, E., Xiang, D., Joo, H., Grauman, K.: You2me: Inferring body pose in egocentric video via first and second person interactions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9890\u20139900 (2020)","DOI":"10.1109\/CVPR42600.2020.00991"},{"key":"11_CR68","doi-asserted-by":"crossref","unstructured":"Northcutt, C., Zha, S., Lovegrove, S., Newcombe, R.: EgoCom: a multi-person multi-modal egocentric communications dataset. In: IEEE Transactions on Pattern Analysis and Machine Intelligence (2020)","DOI":"10.1109\/TPAMI.2020.3025105"},{"key":"11_CR69","doi-asserted-by":"crossref","unstructured":"Ogaki, K., Kitani, K.M., Sugano, Y., Sato, Y.: Coupling eye-motion and ego-motion features for first-person activity recognition. In: 2012 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, pp. 1\u20137. IEEE (2012)","DOI":"10.1109\/CVPRW.2012.6239188"},{"key":"11_CR70","doi-asserted-by":"crossref","unstructured":"Omran, M., Lassner, C., Pons-Moll, G., Gehler, P., Schiele, B.: Neural body fitting: unifying deep learning and model based human pose and shape estimation. In: 2018 international conference on 3D vision (3DV), pp. 484\u2013494. IEEE (2018)","DOI":"10.1109\/3DV.2018.00062"},{"key":"11_CR71","doi-asserted-by":"crossref","unstructured":"Patel, P., Huang, C.H.P., Tesch, J., Hoffmann, D.T., Tripathi, S., Black, M.J.: AGORA: avatars in geography optimized for regression analysis. In: Proceedings IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2021","DOI":"10.1109\/CVPR46437.2021.01326"},{"key":"11_CR72","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3d hands, face, and body from a single image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"11_CR73","doi-asserted-by":"crossref","unstructured":"Pech-Pacheco, J.L., Crist\u00f3bal, G., Chamorro-Martinez, J., Fern\u00e1ndez-Valdivia, J.: Diatom autofocusing in bright field microscopy: a comparative study. In: Proceedings 15th International Conference on Pattern Recognition. ICPR-2000, vol. 3, pp. 314\u2013317. IEEE (2000)","DOI":"10.1109\/ICPR.2000.903548"},{"key":"11_CR74","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., Ramanan, D.: Detecting activities of daily living in first-person camera views. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 2847\u20132854. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"11_CR75","doi-asserted-by":"crossref","unstructured":"Rong, Y., Shiratori, T., Joo, H.: FrankMocap: a monocular 3d whole-body pose estimation system via regression and integration. In: IEEE International Conference on Computer Vision Workshops (2021)","DOI":"10.1109\/ICCVW54120.2021.00201"},{"key":"11_CR76","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Matthies, L.: First-person activity recognition: What are they doing to me? In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2730\u20132737 (2013)","DOI":"10.1109\/CVPR.2013.352"},{"key":"11_CR77","doi-asserted-by":"crossref","unstructured":"Saini, N., et al.: MarkerLess outdoor human motion capture using multiple autonomous micro aerial vehicles. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 823\u2013832 (2019)","DOI":"10.1109\/ICCV.2019.00091"},{"key":"11_CR78","doi-asserted-by":"crossref","unstructured":"Shiratori, T., Park, H.S., Sigal, L., Sheikh, Y., Hodgins, J.K.: Motion capture from body-mounted cameras. In: ACM SIGGRAPH 2011 Papers, pp. 1\u201310 (2011)","DOI":"10.1145\/2010324.1964926"},{"key":"11_CR79","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Gupta, A., Schmid, C., Farhadi, A., Alahari, K.: Actor and observer: joint modeling of first and third-person videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7396\u20137404 (2018)","DOI":"10.1109\/CVPR.2018.00772"},{"key":"11_CR80","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"744","DOI":"10.1007\/978-3-030-58565-5_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Song","year":"2020","unstructured":"Song, J., Chen, X., Hilliges, O.: Human body model fitting by learned gradient descent. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 744\u2013760. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_44"},{"key":"11_CR81","doi-asserted-by":"crossref","unstructured":"Sun, Y., Ye, Y., Liu, W., Gao, W., Fu, Y., Mei, T.: Human mesh recovery from monocular images via a skeleton-disentangled representation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5349\u20135358 (2019)","DOI":"10.1109\/ICCV.2019.00545"},{"key":"11_CR82","doi-asserted-by":"crossref","unstructured":"Tan, J.K.V., Budvytis, I., Cipolla, R.: Indirect deep structured learning for 3d human body shape and pose prediction (2017)","DOI":"10.5244\/C.31.15"},{"key":"11_CR83","doi-asserted-by":"crossref","unstructured":"Tome, D., et al.: SelfPose: 3d egocentric pose estimation from a headset mounted camera. arXiv preprint arXiv:2011.01519 (2020)","DOI":"10.1109\/TPAMI.2020.3029700"},{"key":"11_CR84","doi-asserted-by":"crossref","unstructured":"Tome, D., Peluse, P., Agapito, L., Badino, H.: XR-EgoPose: EgoCentric 3d human pose from an HMD camera. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7728\u20137738 (2019)","DOI":"10.1109\/ICCV.2019.00782"},{"key":"11_CR85","doi-asserted-by":"crossref","unstructured":"Trumble, M., Gilbert, A., Malleson, C., Hilton, A., Collomosse, J.: Total capture: 3d human pose estimation fusing video and inertial sensors. In: 2017 British Machine Vision Conference (BMVC) (2017)","DOI":"10.5244\/C.31.14"},{"key":"11_CR86","unstructured":"Tung, H.Y., Tung, H.W., Yumer, E., Fragkiadaki, K.: Self-supervised learning of motion capture. In: Advances in Neural Information Processing Systems, pp. 5236\u20135246 (2017)"},{"key":"11_CR87","unstructured":"Ungureanu, D., et al.: HoloLens 2 Research Mode as a Tool for Computer Vision Research. arXiv:2008.11239 (2020)"},{"key":"11_CR88","doi-asserted-by":"crossref","unstructured":"Wandt, B., Rudolph, M., Zell, P., Rhodin, H., Rosenhahn, B.: CanonPose: self-supervised monocular 3d human pose estimation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13294\u201313304 (2021)","DOI":"10.1109\/CVPR46437.2021.01309"},{"issue":"5","key":"11_CR89","doi-asserted-by":"publisher","first-page":"1856","DOI":"10.1109\/TVCG.2017.2693151","volume":"24","author":"Y Wang","year":"2017","unstructured":"Wang, Y., Liu, Y., Tong, X., Dai, Q., Tan, P.: Outdoor markerless motion capture with sparse handheld video cameras. IEEE Trans. Visual Comput. Graph. 24(5), 1856\u20131866 (2017)","journal-title":"IEEE Trans. Visual Comput. Graph."},{"key":"11_CR90","doi-asserted-by":"crossref","unstructured":"Weng, Z., Yeung, S.: Holistic 3d human and scene mesh estimation from single view images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 334\u2013343 (2021)","DOI":"10.1109\/CVPR46437.2021.00040"},{"key":"11_CR91","doi-asserted-by":"crossref","unstructured":"Xiang, D., Joo, H., Sheikh, Y.: Monocular total capture: posing face, body, and hands in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.01122"},{"issue":"5","key":"11_CR92","doi-asserted-by":"publisher","first-page":"2093","DOI":"10.1109\/TVCG.2019.2898650","volume":"25","author":"W Xu","year":"2019","unstructured":"Xu, W., et al.: Mo2Cap2: real-time mobile 3D motion capture with a cap-mounted fisheye camera. IEEE Trans. Visual Comput. Graph. 25(5), 2093\u20132101 (2019)","journal-title":"IEEE Trans. Visual Comput. Graph."},{"key":"11_CR93","doi-asserted-by":"crossref","unstructured":"Xu, Y., Zhu, S.C., Tung, T.: DenseRaC: joint 3d pose and shape estimation by dense render-and-compare. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7760\u20137770 (2019)","DOI":"10.1109\/ICCV.2019.00785"},{"key":"11_CR94","doi-asserted-by":"crossref","unstructured":"Yang, J.A., Lee, C.H., Yang, S.W., Somayazulu, V.S., Chen, Y.K., Chien, S.Y.: Wearable social camera: egocentric video summarization for social interaction. In: 2016 IEEE International Conference on Multimedia & Expo Workshops (ICMEW), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/ICMEW.2016.7574681"},{"key":"11_CR95","doi-asserted-by":"crossref","unstructured":"Yonetani, R., Kitani, K.M., Sato, Y.: Recognizing micro-actions and reactions from paired egocentric videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2629\u20132638 (2016)","DOI":"10.1109\/CVPR.2016.288"},{"key":"11_CR96","doi-asserted-by":"crossref","unstructured":"Yu, Z., et al.: HUMBI: a large multiview dataset of human body expressions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2990\u20133000 (2020)","DOI":"10.1109\/CVPR42600.2020.00306"},{"key":"11_CR97","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Kitani, K.: Ego-pose estimation and forecasting as real-time PD control. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10082\u201310092 (2019)","DOI":"10.1109\/ICCV.2019.01018"},{"key":"11_CR98","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Wei, S.E., Simon, T., Kitani, K., Saragih, J.: SimPoe: simulated character control for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7159\u20137169 (2021)","DOI":"10.1109\/CVPR46437.2021.00708"},{"key":"11_CR99","doi-asserted-by":"crossref","unstructured":"Zanfir, A., Bazavan, E.G., Xu, H., Freeman, B., Sukthankar, R., Sminchisescu, C.: Weakly supervised 3d human pose and shape reconstruction with normalizing flows. arXiv preprint arXiv:2003.10350 (2020)","DOI":"10.1007\/978-3-030-58539-6_28"},{"key":"11_CR100","doi-asserted-by":"crossref","unstructured":"Zhang, J., Yu, D., Liew, J.H., Nie, X., Feng, J.: Body meshes as points. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 546\u2013556 (2021)","DOI":"10.1109\/CVPR46437.2021.00061"},{"key":"11_CR101","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhang, Y., Bogo, F., Marc, P., Tang, S.: Learning motion priors for 4d human body capture in 3d scenes. In: International Conference on Computer Vision (ICCV), October 2021","DOI":"10.1109\/ICCV48922.2021.01115"},{"key":"11_CR102","doi-asserted-by":"crossref","unstructured":"Zhang, Y., An, L., Yu, T., Li, X., Li, K., Liu, Y.: 4d association graph for realtime multi-person motion capture using multiple video cameras. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1324\u20131333 (2020)","DOI":"10.1109\/CVPR42600.2020.00140"},{"key":"11_CR103","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Crandall, D., Proulx, M., Talathi, S., Sharma, A.: Can gaze inform egocentric action recognition? In: 2022 Symposium on Eye Tracking Research and Applications, pp. 1\u20137 (2022)","DOI":"10.1145\/3517031.3529628"},{"key":"11_CR104","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Habermann, M., Habibie, I., Tewari, A., Theobalt, C., Xu, F.: Monocular real-time full body capture with inter-part correlations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4811\u20134822 (2021)","DOI":"10.1109\/CVPR46437.2021.00478"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20068-7_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,7]],"date-time":"2024-10-07T21:51:03Z","timestamp":1728337863000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20068-7_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031200670","9783031200687"],"references-count":104,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20068-7_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"11 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}