{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,12]],"date-time":"2026-07-12T02:32:40Z","timestamp":1783823560688,"version":"3.55.0"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012489","type":"print"},{"value":"9783030012496","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01249-6_37","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T15:35:46Z","timestamp":1538753746000},"page":"614-631","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":754,"title":["Recovering Accurate 3D Human Pose in the Wild Using IMUs and a Moving Camera"],"prefix":"10.1007","author":[{"given":"Timo","family":"von Marcard","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Roberto","family":"Henschel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Michael J.","family":"Black","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bodo","family":"Rosenhahn","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gerard","family":"Pons-Moll","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"37_CR1","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., Schiele, B.: Monocular 3D pose estimation and tracking by detection. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 623\u2013630 (2010)","DOI":"10.1109\/CVPR.2010.5540156"},{"key":"37_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/978-3-319-46454-1_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"F Bogo","year":"2016","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., Black, M.J.: Keep it SMPL: automatic estimation of 3D human pose and shape from a single image. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 561\u2013578. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_34"},{"issue":"Oct","key":"37_CR3","first-page":"2879","volume":"12","author":"AD Bull","year":"2011","unstructured":"Bull, A.D.: Convergence rates of efficient global optimization algorithms. J. Mach. Learn. Res. 12(Oct), 2879\u20132904 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2D pose estimation using part affinity fields. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"37_CR5","unstructured":"Gurobi Optimization Inc.: Gurobi Optimizer Reference Manual (2016)"},{"key":"37_CR6","doi-asserted-by":"crossref","unstructured":"Helten, T., Baak, A., Bharaj, G., Muller, M., Seidel, H.P., Theobalt, C.: Personalization and evaluation of a real-time depth-based full body tracker. In: 3D Vision (3DV) (2013)","DOI":"10.1109\/3DV.2013.44"},{"key":"37_CR7","doi-asserted-by":"crossref","unstructured":"Henschel, R., Leal-Taix\u00e9, L., Cremers, D., Rosenhahn, B.: Fusion of head and full-body detectors for multi-object tracking. In: Computer Vision and Pattern Recognition Workshops (CVPRW) (2018)","DOI":"10.1109\/CVPRW.2018.00192"},{"issue":"7","key":"37_CR8","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 36(7), 1325\u20131339 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"37_CR9","doi-asserted-by":"crossref","unstructured":"Jahangiri, E., Yuille, A.L.: Generating multiple diverse hypotheses for human 3D pose consistent with 2D joint detections. In: IEEE International Conference on Computer Vision (ICCV) Workshops (PeopleCap) (2017)","DOI":"10.1109\/ICCVW.2017.100"},{"key":"37_CR10","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"37_CR11","doi-asserted-by":"crossref","unstructured":"Lassner, C., Romero, J., Kiefel, M., Bogo, F., Black, M.J., Gehler, P.V.: Unite the people: closing the loop between 3D and 2D human representations. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), vol. 2 (2017)","DOI":"10.1109\/CVPR.2017.500"},{"key":"37_CR12","doi-asserted-by":"crossref","unstructured":"Levinkov, E., et al.: Joint graph decomposition & node labeling: problem, algorithms, applications. In: CVPR, vol. 7. IEEE (2017)","DOI":"10.1109\/CVPR.2017.206"},{"key":"37_CR13","doi-asserted-by":"crossref","unstructured":"Li, S., Zhang, W., Chan, A.B.: Maximum-margin structured learning with deep networks for 3D human pose estimation. In: IEEE International Conference on Computer Vision (ICCV), pp. 2848\u20132856 (2015)","DOI":"10.1109\/ICCV.2015.326"},{"issue":"6","key":"37_CR14","doi-asserted-by":"publisher","first-page":"248:1","DOI":"10.1145\/2816795.2818013","volume":"34","author":"M Loper","year":"2015","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. 34(6), 248:1\u2013248:16 (2015)","journal-title":"ACM Trans. Graph."},{"issue":"6","key":"37_CR15","first-page":"220:1","volume":"33","author":"MM Loper","year":"2014","unstructured":"Loper, M.M., Mahmood, N., Black, M.J.: MoSh: motion and shape capture from sparse markers. ACM Trans. Graph. (Proc. SIGGRAPH Asia) 33(6), 220:1\u2013220:13 (2014)","journal-title":"ACM Trans. Graph. (Proc. SIGGRAPH Asia)"},{"key":"37_CR16","doi-asserted-by":"crossref","unstructured":"Malleson, C., Volino, M., Gilbert, A., Trumble, M., Collomosse, J., Hilton, A.: Real-time full-body motion capture from video and IMUs. In: 2017 Fifth International Conference on 3D Vision (3DV) (2017)","DOI":"10.1109\/3DV.2017.00058"},{"issue":"8","key":"37_CR17","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TPAMI.2016.2522398","volume":"38","author":"T von Marcard","year":"2016","unstructured":"von Marcard, T., Pons-Moll, G., Rosenhahn, B.: Human pose estimation from video and IMUs. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 38(8), 1533\u20131547 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"37_CR18","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.J.: A simple yet effective baseline for 3D human pose estimation. In: IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"37_CR19","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: 3D Vision (3DV). IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"37_CR20","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Single-shot multi-person 3D body pose estimation from monocular RGB input. arXiv preprint arXiv:1712.03453 (2017)","DOI":"10.1109\/3DV.2018.00024"},{"issue":"4","key":"37_CR21","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta, D., et al.: VNect: real-time 3D human pose estimation with a single RGB camera. ACM Trans. Graph. (TOG) 36(4), 44 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"37_CR22","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhu, L., Zhou, X., Daniilidis, K.: Learning to estimate 3D human pose and shape from a single color image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00055"},{"key":"37_CR23","doi-asserted-by":"crossref","unstructured":"Pishchulin, L., et al.: DeepCut: joint subset partition and labeling for multi person pose estimation. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.533"},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"Pons-Moll, G., et al.: Outdoor human motion capture using inverse kinematics and von mises-fisher sampling. In: Proceedings of the 2011 International Conference on Computer Vision (ICCV), pp. 1243\u20131250 (2011)","DOI":"10.1109\/ICCV.2011.6126375"},{"key":"37_CR25","doi-asserted-by":"crossref","unstructured":"Pons-Moll, G., Baak, A., Helten, T., M\u00fcller, M., Seidel, H.P., Rosenhahn, B.: Multisensor-fusion for 3D full-body human motion capture. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 663\u2013670 (2010)","DOI":"10.1109\/CVPR.2010.5540153"},{"key":"37_CR26","doi-asserted-by":"crossref","unstructured":"Pons-Moll, G., Fleet, D.J., Rosenhahn, B.: Posebits for monocular human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2337\u20132344 (2014)","DOI":"10.1109\/CVPR.2014.300"},{"issue":"4","key":"37_CR27","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1145\/3072959.3073711","volume":"36","author":"G Pons-Moll","year":"2017","unstructured":"Pons-Moll, G., Pujades, S., Hu, S., Black, M.: ClothCap: seamless 4D clothing capture and retargeting. ACM Trans. Graph. (Proc. SIGGRAPH) 36(4), 73 (2017)","journal-title":"ACM Trans. Graph. (Proc. SIGGRAPH)"},{"key":"37_CR28","doi-asserted-by":"crossref","unstructured":"Popa, A.I., Zanfir, M., Sminchisescu, C.: Deep multitask architecture for integrated 2D and 3D human sensing. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.501"},{"key":"37_CR29","doi-asserted-by":"crossref","unstructured":"Rhodin, H., et al.: Learning monocular 3D human pose estimation from multi-view images. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00880"},{"key":"37_CR30","unstructured":"Roetenberg, D., Luinge, H., Slycke, P.: Moven: full 6DOF human motion tracking using miniature inertial sensors. Xsen Technologies, December 2007"},{"key":"37_CR31","doi-asserted-by":"crossref","unstructured":"Rogez, G., Weinzaepfel, P., Schmid, C.: LCR-Net++: multi-person 2D and 3D pose detection in natural images. arXiv preprint arXiv:1803.00455 (2018)","DOI":"10.1109\/TPAMI.2019.2892985"},{"issue":"1\u20132","key":"37_CR32","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A.O., Black, M.J.: Humaneva: synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vis. (IJCV) 87(1\u20132), 4 (2010)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"37_CR33","doi-asserted-by":"crossref","unstructured":"Simo-Serra, E., Quattoni, A., Torras, C., Moreno-Noguer, F.: A joint model for 2D and 3D pose estimation from a single image. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3634\u20133641 (2013)","DOI":"10.1109\/CVPR.2013.466"},{"key":"37_CR34","doi-asserted-by":"crossref","unstructured":"Simo-Serra, E., Ramisa, A., Aleny\u00e0, G., Torras, C., Moreno-Noguer, F.: Single image 3D human pose estimation from noisy observations. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2673\u20132680 (2012)","DOI":"10.1109\/CVPR.2012.6247988"},{"key":"37_CR35","unstructured":"Sminchisescu, C., Triggs, B.: Kinematic jump processes for monocular 3D human tracking. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2003)"},{"key":"37_CR36","doi-asserted-by":"crossref","unstructured":"Sun, X., Shang, J., Liang, S., Wei, Y.: Compositional human pose regression. arXiv preprint arXiv:1704.00159 (2017)","DOI":"10.1109\/ICCV.2017.284"},{"key":"37_CR37","doi-asserted-by":"crossref","unstructured":"Tang, S., Andres, B., Andriluka, M., Schiele, B.: Subgraph decomposition for multi-target tracking. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5033\u20135041 (2015)","DOI":"10.1109\/CVPR.2015.7299138"},{"key":"37_CR38","doi-asserted-by":"crossref","unstructured":"Tome, D., Russell, C., Agapito, L.: Lifting from the deep: convolutional 3D pose estimation from a single image. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.603"},{"key":"37_CR39","doi-asserted-by":"crossref","unstructured":"Trumble, M., Gilbert, A., Malleson, C., Hilton, A., Collomosse, J.: Total capture: 3D human pose estimation fusing video and inertial sensors. In: Proceedings of 28th British Machine Vision Conference, pp. 1\u201313 (2017)","DOI":"10.5244\/C.31.14"},{"key":"37_CR40","unstructured":"Tung, H.Y., Tung, H.W., Yumer, E., Fragkiadaki, K.: Self-supervised learning of motion capture. In: NIPS (2017)"},{"issue":"3","key":"37_CR41","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1145\/1276377.1276421","volume":"26","author":"D Vlasic","year":"2007","unstructured":"Vlasic, D., et al.: Practical motion capture in everyday surroundings. ACM Trans. Graph. (TOG) 26(3), 35 (2007)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"37_CR42","doi-asserted-by":"crossref","unstructured":"von Marcard, T., Rosenhahn, B., Black, M., Pons-Moll, G.: Sparse inertial poser: automatic 3D human pose estimation from sparse IMUs. In: Computer Graphics Forum, Proceedings of the 38th Annual Conference of the European Association for Computer Graphics (Eurographics), vol. 36, no. 2, pp. 349\u2013360 (2017)","DOI":"10.1111\/cgf.13131"},{"issue":"8","key":"37_CR43","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/TPAMI.2016.2553028","volume":"38","author":"B Wandt","year":"2016","unstructured":"Wandt, B., Ackermann, H., Rosenhahn, B.: 3D reconstruction of human motion from monocular image sequences. Trans. Pattern Anal. Mach. Intell. (TPAMI) 38(8), 1505\u20131516 (2016)","journal-title":"Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"37_CR44","doi-asserted-by":"crossref","unstructured":"Wang, C., Wang, Y., Lin, Z., Yuille, A.L., Gao, W.: Robust estimation of 3D human poses from a single image. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2361\u20132368 (2014)","DOI":"10.1109\/CVPR.2014.303"},{"key":"37_CR45","doi-asserted-by":"crossref","unstructured":"Zell, P., Wandt, B., Rosenhahn, B.: Joint 3D human motion capture and physical analysis from monocular videos. In: The IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW) (2017)","DOI":"10.1109\/CVPRW.2017.9"},{"key":"37_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, C., Pujades, S., Black, M., Pons-Moll, G.: Detailed, accurate, human shape estimation from clothed 3D scan sequences. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.582"},{"key":"37_CR47","doi-asserted-by":"crossref","unstructured":"Zheng, Z., et al.: HybridFusion: real-time performance capture using a single depth sensor and sparse IMUs. In: European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01240-3_24"},{"key":"37_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1007\/978-3-319-10599-4_5","volume-title":"Computer Vision \u2013 ECCV 2014","author":"F Zhou","year":"2014","unstructured":"Zhou, F., De la Torre, F.: Spatio-temporal matching for human detection in video. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 62\u201377. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_5"},{"key":"37_CR49","doi-asserted-by":"crossref","unstructured":"Zhou, X., Leonardos, S., Hu, X., Daniilidis, K.: 3D shape estimation from 2D landmarks: a convex relaxation approach. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4447\u20134455 (2015)","DOI":"10.1109\/CVPR.2015.7299074"},{"key":"37_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, X., Huang, Q., Sun, X., Xue, X., Wei, Y.: Towards 3D human pose estimation in the wild: a weakly-supervised approach. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 398\u2013407 (2017)","DOI":"10.1109\/ICCV.2017.51"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01249-6_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T00:58:17Z","timestamp":1664931497000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01249-6_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012489","9783030012496"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01249-6_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}