{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:47:32Z","timestamp":1777657652121,"version":"3.51.4"},"publisher-location":"Cham","reference-count":68,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031167874","type":"print"},{"value":"9783031167881","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-16788-1_18","type":"book-chapter","created":{"date-parts":[[2022,9,22]],"date-time":"2022-09-22T20:35:56Z","timestamp":1663878956000},"page":"281-299","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":43,"title":["InterCap: Joint Markerless 3D Tracking of\u00a0Humans and\u00a0Objects in\u00a0Interaction"],"prefix":"10.1007","author":[{"given":"Yinghao","family":"Huang","sequence":"first","affiliation":[]},{"given":"Omid","family":"Taheri","sequence":"additional","affiliation":[]},{"given":"Michael J.","family":"Black","sequence":"additional","affiliation":[]},{"given":"Dimitrios","family":"Tzionas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,20]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Alldieck, T., Magnor, M., Xu, W., Theobalt, C., Pons-Moll, G.: Video based reconstruction of 3D people models. In: Computer Vision and Pattern Recognition (CVPR), pp. 8387\u20138397 (2018)","DOI":"10.1109\/CVPR.2018.00875"},{"issue":"3","key":"18_CR2","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1145\/1073204.1073207","volume":"24","author":"D Anguelov","year":"2005","unstructured":"Anguelov, D., Srinivasan, P., Koller, D., Thrun, S., Rodgers, J., Davis, J.: SCAPE: shape completion and animation of people. Trans. Graph. (TOG) 24(3), 408\u2013416 (2005)","journal-title":"Trans. Graph. (TOG)"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Bhatnagar, B.L., Xie, X., Petrov, I.A., Sminchisescu, C., Theobalt, C., Pons-Moll, G.: BEHAVE: Dataset and method for tracking human object interactions. In: Computer Vision and Pattern Recognition (CVPR), pp. 15935\u201315946 (2022)","DOI":"10.1109\/CVPR52688.2022.01547"},{"key":"18_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/978-3-319-46454-1_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"F Bogo","year":"2016","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., Black, M.J.: Keep It SMPL: automatic estimation of 3D human pose and shape from a single image. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 561\u2013578. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_34"},{"key":"18_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/978-3-030-58452-8_23","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Cao","year":"2020","unstructured":"Cao, Z., Gao, H., Mangalam, K., Cai, Q.-Z., Vo, M., Malik, J.: Long-term human motion prediction with scene context. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 387\u2013404. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_23"},{"issue":"1","key":"18_CR6","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2019","unstructured":"Cao, Z., Hidalgo, G., Simon, T., Wei, S.E., Sheikh, Y.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. Trans. Pattern Anal. Mach. Intell. (TPAMI) 43(1), 172\u2013186 (2019)","journal-title":"Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"18_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-030-58607-2_2","volume-title":"Computer Vision \u2013 ECCV 2020","author":"V Choutas","year":"2020","unstructured":"Choutas, V., Pavlakos, G., Bolkart, T., Tzionas, D., Black, M.J.: Monocular expressive body regression through body-driven attention. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 20\u201340. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_2"},{"issue":"3","key":"18_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1360612.1360697","volume":"27","author":"E De Aguiar","year":"2008","unstructured":"De Aguiar, E., Stoll, C., Theobalt, C., Ahmed, N., Seidel, H.P., Thrun, S.: Performance capture from sparse multi-view video. Trans. Graph. (TOG) 27(3), 1\u201310 (2008)","journal-title":"Trans. Graph. (TOG)"},{"issue":"8","key":"18_CR9","first-page":"1","volume":"14","author":"J Dong","year":"2021","unstructured":"Dong, J., Fang, Q., Jiang, W., Yang, Y., Huang, Q., Bao, H., Zhou, X.: Fast and robust multi-person 3D pose estimation and tracking from multiple views. Trans. Pattern Anal. Mach. Intell. (TPAMI) 14(8), 1\u201312 (2021)","journal-title":"Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Dong, J., Jiang, W., Huang, Q., Bao, H., Zhou, X.: Fast and robust multi-person 3D pose estimation from multiple views. In: Computer Vision and Pattern Recognition (CVPR), pp. 7792\u20137801 (2019)","DOI":"10.1109\/CVPR.2019.00798"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Dong, Z., Song, J., Chen, X., Guo, C., Hilliges, O.: Shape-aware multi-person pose estimation from multi-view images. In: International Conference on Computer Vision (ICCV), pp. 11158\u201311168 (2021)","DOI":"10.1109\/ICCV48922.2021.01097"},{"key":"18_CR12","unstructured":"Geman, S., McClure, D.E.: Statistical methods for tomographic image reconstruction. In: Proceedings of the 46th Session of the International Statistical Institute, Bulletin of the ISI, vol. 52 (1987)"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Hamer, H., Schindler, K., Koller-Meier, E., Van Gool, L.: Tracking a hand manipulating an object. In: International Conference on Computer Vision (ICCV), pp. 1475\u20131482 (2009)","DOI":"10.1109\/ICCV.2009.5459282"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Hampali, S., Rad, M., Oberweger, M., Lepetit, V.: HOnnotate: a method for 3D annotation of hand and object poses. In: Computer Vision and Pattern Recognition (CVPR), pp. 3193\u20133203 (2020)","DOI":"10.1109\/CVPR42600.2020.00326"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Hassan, M., Choutas, V., Tzionas, D., Black, M.J.: Resolving 3D human pose ambiguities with 3D scene constrains. In: International Conference on Computer Vision (ICCV), pp. 2282\u20132292 (2019)","DOI":"10.1109\/ICCV.2019.00237"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Hassan, M., Ghosh, P., Tesch, J., Tzionas, D., Black, M.J.: Populating 3D scenes by learning human-scene interaction. In: Computer Vision and Pattern Recognition (CVPR), pp. 14708\u201314718 (2021)","DOI":"10.1109\/CVPR46437.2021.01447"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Tekin, B., Bogo, F., Laptev, I., Pollefeys, M., Schmid, C.: Leveraging photometric consistency over time for sparsely supervised hand-object reconstruction. In: Computer Vision and Pattern Recognition (CVPR), pp. 568\u2013577 (2020)","DOI":"10.1109\/CVPR42600.2020.00065"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Varol, G., Tzionas, D., Kalevatykh, I., Black, M.J., Laptev, I., Schmid, C.: Learning joint reconstruction of hands and manipulated objects. In: Computer Vision and Pattern Recognition (CVPR), pp. 11807\u201311816 (2019)","DOI":"10.1109\/CVPR.2019.01208"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"He, Y., Yan, R., Fragkiadaki, K., Yu, S.I.: Epipolar transformers. In: Computer Vision and Pattern Recognition (CVPR), pp. 7776\u20137785 (2020)","DOI":"10.1109\/CVPR42600.2020.00780"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Hu, Y.T., Chen, H.S., Hui, K., Huang, J.B., Schwing, A.G.: SAIL-VOS: semantic amodal instance level video object segmentation - a synthetic dataset and baselines. In: Computer Vision and Pattern Recognition (CVPR), pp. 3105\u20133115 (2019)","DOI":"10.1109\/CVPR.2019.00322"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Huang, C.H.P., et al.: Capturing and inferring dense full-body human-scene contact. In: Computer Vision and Pattern Recognition (CVPR), pp. 13274\u201313285 (2022)","DOI":"10.1109\/CVPR52688.2022.01292"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Huang, Y., et al.: Towards accurate marker-less human shape and pose estimation over time. In: International Conference on 3D Vision (3DV), pp. 421\u2013430 (2017)","DOI":"10.1109\/3DV.2017.00055"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. Trans. Pattern Anal. Mach. Intell. (TPAMI) 36(7), 1325\u20131339 (2014)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Iskakov, K., Burkov, E., Lempitsky, V., Malkov, Y.: Learnable triangulation of human pose. In: International Conference on Computer Vision (ICCV), pp. 7717\u20137726 (2019)","DOI":"10.1109\/ICCV.2019.00781"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: Computer Vision and Pattern Recognition (CVPR), pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Karunratanakul, K., Yang, J., Zhang, Y., Black, M.J., Muandet, K., Tang, S.: Grasping field: learning implicit representations for human grasps. In: International Conference on 3D Vision (3DV), pp. 333\u2013344 (2020)","DOI":"10.1109\/3DV50981.2020.00043"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Kato, H., Ushiku, Y., Harada, T.: Neural 3D mesh renderer. In: Computer Vision and Pattern Recognition (CVPR), pp. 3907\u20133916 (2018)","DOI":"10.1109\/CVPR.2018.00411"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Wu, Y., He, K., Girshick, R.: PointRend: image segmentation as rendering. In: Computer Vision and Pattern Recognition (CVPR), pp. 9799\u20139808 (2020)","DOI":"10.1109\/CVPR42600.2020.00982"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: VIBE: video inference for human body pose and shape estimation. In: Computer Vision and Pattern Recognition (CVPR), pp. 5252\u20135262 (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Li, X., Liu, S., Kim, K., Wang, X., Yang, M., Kautz, J.: Putting humans in a scene: learning affordance in 3D indoor environments. In: Computer Vision and Pattern Recognition (CVPR), pp. 12368\u201312376 (2019)","DOI":"10.1109\/CVPR.2019.01265"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Liu, Y., Stoll, C., Gall, J., Seidel, H.P., Theobalt, C.: Markerless motion capture of interacting characters using multi-view image segmentation. In: Computer Vision and Pattern Recognition (CVPR), pp. 1249\u20131256 (2011)","DOI":"10.1109\/CVPR.2011.5995424"},{"issue":"6","key":"18_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2661229.2661273","volume":"33","author":"M Loper","year":"2014","unstructured":"Loper, M., Mahmood, N., Black, M.J.: MoSh: motion and shape capture from sparse markers. Trans. Graph. (TOG) 33(6), 1\u201313 (2014)","journal-title":"Trans. Graph. (TOG)"},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. Trans. Graph. (TOG) 34(6), 248:1\u2013248:16 (2015)","DOI":"10.1145\/2816795.2818013"},{"key":"18_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-319-10584-0_11","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MM Loper","year":"2014","unstructured":"Loper, M.M., Black, M.J.: OpenDR: an approximate differentiable renderer. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 154\u2013169. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_11"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., F. Troje, N., Pons-Moll, G., Black, M.J.: AMASS: archive of motion capture as surface shapes. In: International Conference on Computer Vision (ICCV), pp. 5441\u20135450 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"18_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1007\/978-3-030-01249-6_37","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T von Marcard","year":"2018","unstructured":"von Marcard, T., Henschel, R., Black, M.J., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3D human pose in the wild using IMUs and a moving camera. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11214, pp. 614\u2013631. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01249-6_37"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: VNect: real-time 3D human pose estimation with a single RGB camera. Trans. Graph. (TOG) 36(4), 44:1\u201344:14 (2017)","DOI":"10.1145\/3072959.3073596"},{"key":"18_CR38","unstructured":"Microsoft: Azure Kinect SDK (K4A) (2022). https:\/\/github.com\/microsoft\/Azure-Kinect-Sensor-SDK"},{"key":"18_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Oikonomidis, I., Kyriazis, N., Argyros, A.A.: Full DOF tracking of a hand interacting with an object by modeling occlusions and physical constraints. In: International Conference on Computer Vision (ICCV), pp. 2088\u20132095 (2011)","DOI":"10.1109\/ICCV.2011.6126483"},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Omran, M., Lassner, C., Pons-Moll, G., Gehler, P., Schiele, B.: Neural body fitting: unifying deep learning and model based human pose and shape estimation. In: International Conference on 3D Vision (3DV), pp. 484\u2013494 (2018)","DOI":"10.1109\/3DV.2018.00062"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Osman, A.A.A., Bolkart, T., Tzionas, D., Black, M.J.: SUPR: a sparse unified part-based human body model. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-20086-1_33"},{"key":"18_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"598","DOI":"10.1007\/978-3-030-58539-6_36","volume-title":"Computer Vision \u2013 ECCV 2020","author":"AAA Osman","year":"2020","unstructured":"Osman, A.A.A., Bolkart, T., Black, M.J.: STAR: sparse trained articulated human body regressor. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12351, pp. 598\u2013613. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_36"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3D hands, face, and body from a single image. In: Computer Vision and Pattern Recognition (CVPR), pp. 10975\u201310985 (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Pons-Moll, G., Baak, A., Helten, T., M\u00fcller, M., Seidel, H.P., Rosenhahn, B.: Multisensor-fusion for 3D full-body human motion capture. In: Computer Vision and Pattern Recognition (CVPR), pp. 663\u2013670 (2010)","DOI":"10.1109\/CVPR.2010.5540153"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Qiu, H., Wang, C., Wang, J., Wang, N., Zeng, W.: Cross view fusion for 3D human pose estimation. In: International Conference on Computer Vision (ICCV), pp. 4341\u20134350 (2019)","DOI":"10.1109\/ICCV.2019.00444"},{"key":"18_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1007\/978-3-319-46454-1_31","volume-title":"Computer Vision \u2013 ECCV 2016","author":"H Rhodin","year":"2016","unstructured":"Rhodin, H., Robertini, N., Casas, D., Richardt, C., Seidel, H.-P., Theobalt, C.: General automatic human shape and motion capture using volumetric contour cues. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 509\u2013526. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_31"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Rogez, G., III, J.S.S., Ramanan, D.: Understanding everyday hands in action from RGB-D images. In: International Conference on Computer Vision (ICCV), pp. 3889\u20133897 (2015)","DOI":"10.1109\/ICCV.2015.443"},{"key":"18_CR49","doi-asserted-by":"crossref","unstructured":"Romero, J., Kjellstr\u00f6m, H., Kragic, D.: Hands in action: Real-time 3D reconstruction of hands in interaction with objects. In: International Conference on Robotics and Automation (ICRA), pp. 458\u2013463 (2010)","DOI":"10.1109\/ROBOT.2010.5509753"},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Romero, J., Tzionas, D., Black, M.J.: Embodied hands: Modeling and capturing hands and bodies together. Trans. Graph. (TOG) 36(6), 245:1\u2013245:17 (2017)","DOI":"10.1145\/3130800.3130883"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Savva, M., Chang, A.X., Hanrahan, P., Fisher, M., Nie\u00dfner, M.: PiGraphs: Learning interaction snapshots from observations. Trans. Graph. (TOG) 35(4), 139:1\u2013139:12 (2016)","DOI":"10.1145\/2897824.2925867"},{"issue":"1\u20132","key":"18_CR52","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A., Black, M.J.: HumanEva: Synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vision (IJCV) 87(1\u20132), 4\u201327 (2010)","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"18_CR53","doi-asserted-by":"crossref","unstructured":"Sun, J., Wang, Z., Zhang, S., He, X., Zhao, H., Zhang, G., Zhou, X.: OnePose: one-shot object pose estimation without CAD models. In: CVPR, pp. 6825\u20136834 (2022)","DOI":"10.1109\/CVPR52688.2022.00670"},{"key":"18_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/978-3-030-58548-8_34","volume-title":"Computer Vision \u2013 ECCV 2020","author":"O Taheri","year":"2020","unstructured":"Taheri, O., Ghorbani, N., Black, M.J., Tzionas, D.: GRAB: a dataset of whole-body human grasping of objects. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12349, pp. 581\u2013600. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58548-8_34"},{"key":"18_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-030-58452-8_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Tu","year":"2020","unstructured":"Tu, H., Wang, C., Zeng, W.: VoxelPose: towards multi-camera 3d human pose estimation in wild environment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 197\u2013212. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_12"},{"issue":"2","key":"18_CR56","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1007\/s11263-016-0895-4","volume":"118","author":"D Tzionas","year":"2016","unstructured":"Tzionas, D., Ballan, L., Srikantha, A., Aponte, P., Pollefeys, M., Gall, J.: Capturing hands in action using discriminative salient points and physics simulation. Int. J. Comput. Vis. (IJCV) 118(2), 172\u2013193 (2016)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"issue":"6","key":"18_CR57","doi-asserted-by":"publisher","first-page":"1510","DOI":"10.1109\/TPAMI.2017.2712608","volume":"40","author":"G Varol","year":"2017","unstructured":"Varol, G., Laptev, I., Schmid, C.: Long-term temporal convolutions for action recognition. Trans. Pattern Anal. Mach. Intell. (TPAMI) 40(6), 1510\u20131517 (2017)","journal-title":"Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Wei, S.E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: Computer Vision and Pattern Recognition (CVPR), pp. 4724\u20134732 (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"18_CR59","doi-asserted-by":"crossref","unstructured":"Xu, H., Bazavan, E.G., Zanfir, A., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: GHUM & GHUML: generative 3D human shape and articulated pose models. In: Computer Vision and Pattern Recognition (CVPR), pp. 6183\u20136192 (2020)","DOI":"10.1109\/CVPR42600.2020.00622"},{"issue":"2","key":"18_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3181973","volume":"37","author":"W Xu","year":"2018","unstructured":"Xu, W., Chatterjee, A., Zollh\u00f6fer, M., Rhodin, H., Mehta, D., Seidel, H.P., Theobalt, C.: MonoPerfCap: human performance capture from monocular video. Trans. Graph. (TOG) 37(2), 1\u201315 (2018)","journal-title":"Trans. Graph. (TOG)"},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Yao, B., Fei-Fei, L.: Modeling mutual context of object and human pose in human-object interaction activities. In: Computer Vision and Pattern Recognition (CVPR), pp. 17\u201324 (2010)","DOI":"10.1109\/CVPR.2010.5540235"},{"key":"18_CR62","doi-asserted-by":"crossref","unstructured":"Yi, H., et al.: Human-aware object placement for visual environment reconstruction. In: Computer Vision and Pattern Recognition (CVPR), pp. 3959\u20133970 (2022)","DOI":"10.1109\/CVPR52688.2022.00393"},{"key":"18_CR63","doi-asserted-by":"crossref","unstructured":"Zhang, J.Y., Pepose, S., Joo, H., Ramanan, D., Malik, J., Kanazawa, A.: Perceiving 3D human-object spatial arrangements from a single image in the wild. In: European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58610-2_3"},{"key":"18_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhang, Y., Bogo, F., Pollefeys, M., Tang, S.: Learning motion priors for 4D human body capture in 3D scenes. In: Computer Vision and Pattern Recognition (CVPR), pp. 11323\u201311333 (2021)","DOI":"10.1109\/ICCV48922.2021.01115"},{"key":"18_CR65","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Hassan, M., Neumann, H., Black, M.J., Tang, S.: Generating 3D people in scenes without people. In: Computer Vision and Pattern Recognition (CVPR), pp. 6193\u20136203 (2020)","DOI":"10.1109\/CVPR42600.2020.00623"},{"key":"18_CR66","doi-asserted-by":"crossref","unstructured":"Zhang, Y., An, L., Yu, T., Li, X., Li, K., Liu, Y.: 4D association graph for realtime multi-person motion capture using multiple video cameras. In: Computer Vision and Pattern Recognition (CVPR), pp. 1321\u20131330 (2020)","DOI":"10.1109\/CVPR42600.2020.00140"},{"key":"18_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Li, Z., An, L., Li, M., Yu, T., Liu, Y.: Light-weight multi-person total capture using sparse multi-view cameras. In: International Conference on Computer Vision (ICCV), pp. 5560\u20135569 (2021)","DOI":"10.1109\/ICCV48922.2021.00551"},{"issue":"2","key":"18_CR68","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1111\/cgf.13386","volume":"37","author":"M Zollh\u00f6fer","year":"2018","unstructured":"Zollh\u00f6fer, M.: State of the art on 3D reconstruction with RGB-D cameras. Comput. Graph. Forum (CGF) 37(2), 625\u2013652 (2018)","journal-title":"Comput. Graph. Forum (CGF)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-16788-1_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T03:50:28Z","timestamp":1676865028000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-16788-1_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031167874","9783031167881"],"references-count":68,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-16788-1_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"20 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Disclosure:.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"DAGM German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Konstanz","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"44","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/gcpr-vmv-2022.uni-konstanz.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"78","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}