{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T03:08:11Z","timestamp":1769310491752,"version":"3.49.0"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319464923","type":"print"},{"value":"9783319464930","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46493-0_2","type":"book-chapter","created":{"date-parts":[[2016,9,16]],"date-time":"2016-09-16T14:59:53Z","timestamp":1474037993000},"page":"20-36","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":53,"title":["Marker-Less 3D Human Motion Capture with Monocular Image Sequence and Height-Maps"],"prefix":"10.1007","author":[{"given":"Yu","family":"Du","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongkang","family":"Wong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yonghao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feilin","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yilin","family":"Gui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohan","family":"Kankanhalli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weidong","family":"Geng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,17]]},"reference":[{"key":"2_CR1","unstructured":"United Nations, Department of Economic, Social Affairs, Population Division: World population ageing 2013 (2013). ST\/SEA\/SER.A\/348"},{"key":"2_CR2","unstructured":"Chen, X., Yuille, A.L.: Articulated pose estimation by a graphical model with image dependent pairwise relations. In: NIPS, pp. 1736\u20131744 (2014)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Wandt, B., Ackermann, H., Rosenhahn, B.: 3D human motion capture from monocular image sequences. In: CVPR Workshops, pp. 1\u20138 (2015)","DOI":"10.1109\/CVPRW.2015.7301286"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., Schiele, B.: Monocular 3D pose estimation and tracking by detection. In: CVPR, pp. 623\u2013630 (2010)","DOI":"10.1109\/CVPR.2010.5540156"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Wang, C., Wang, Y., Lin, Z., Yuille, A.L., Gao, W.: Robust estimation of 3D human poses from a single image. In: CVPR, pp. 2369\u20132376 (2014)","DOI":"10.1109\/CVPR.2014.303"},{"issue":"1","key":"2_CR6","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1007\/s11263-011-0451-1","volume":"96","author":"M Hofmann","year":"2012","unstructured":"Hofmann, M., Gavrila, D.M.: Multi-view 3D human pose estimation in complex environment. Int. J. Comput. Vis. 96(1), 103\u2013124 (2012)","journal-title":"Int. J. Comput. Vis."},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Hasler, N., Rosenhahn, B., Thorm\u00e4hlen, T., Wand, M., Gall, J., Seidel, H.: Markerless motion capture with unsynchronized moving cameras. In: CVPR, pp. 224\u2013231 (2009)","DOI":"10.1109\/CVPR.2009.5206859"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Elhayek, A., de Aguiar, E., Jain, A., Tompson, J., Pishchulin, L., Andriluka, M., Bregler, C., Schiele, B., Theobalt, C.: Efficient ConvNet-based marker-less motion capture in general scenes with a low number of cameras. In: CVPR, pp. 3810\u20133818 (2015)","DOI":"10.1109\/CVPR.2015.7299005"},{"key":"2_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"573","DOI":"10.1007\/978-3-642-33765-9_41","volume-title":"Computer Vision \u2013 ECCV 2012","author":"V Ramakrishna","year":"2012","unstructured":"Ramakrishna, V., Kanade, T., Sheikh, Y.: Reconstructing 3D human pose from 2D image landmarks. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 573\u2013586. Springer, Heidelberg (2012). doi:\n                      10.1007\/978-3-642-33765-9_41"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Simo-Serra, E., Ramisa, A., Aleny\u00e0, G., Torras, C., Moreno-Noguer, F.: Single image 3D human pose estimation from noisy observations. In: CVPR, pp. 2673\u20132680 (2012)","DOI":"10.1109\/CVPR.2012.6247988"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Sridhar, S., Oulasvirta, A., Theobalt, C.: Interactive markerless articulated hand motion tracking using RGB and depth data. In: ICCV, pp. 2456\u20132463 (2013)","DOI":"10.1109\/ICCV.2013.305"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Gupta, S., Arbelaez, P., Girshick, R., Malik, J.: Aligning 3D models to RGB-D images of cluttered scenes. In: CVPR, pp. 4731\u20134740 (2015)","DOI":"10.1109\/CVPR.2015.7299105"},{"issue":"1","key":"2_CR13","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/T-C.1973.223602","volume":"22","author":"MA Fischler","year":"1973","unstructured":"Fischler, M.A., Elschlager, R.A.: The representation and matching of pictorial structures. IEEE Trans. Comput. 22(1), 67\u201392 (1973)","journal-title":"IEEE Trans. Comput."},{"issue":"1","key":"2_CR14","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1023\/B:VISI.0000042934.15159.49","volume":"61","author":"PF Felzenszwalb","year":"2005","unstructured":"Felzenszwalb, P.F., Huttenlocher, D.P.: Pictorial structures for object recognition. Int. J. Comput. Vis. 61(1), 55\u201379 (2005)","journal-title":"Int. J. Comput. Vis."},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., Schiele, B.: Pictorial structures revisited: people detection and articulated pose estimation. In: CVPR, pp. 1014\u20131021 (2009)","DOI":"10.1109\/CVPR.2009.5206754"},{"issue":"12","key":"2_CR16","doi-asserted-by":"publisher","first-page":"2878","DOI":"10.1109\/TPAMI.2012.261","volume":"35","author":"Y Yang","year":"2013","unstructured":"Yang, Y., Ramanan, D.: Articulated human detection with flexible mixtures of parts. IEEE Trans. Pattern Anal. Mach. Intell. 35(12), 2878\u20132890 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"2_CR17","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1145\/2398356.2398381","volume":"56","author":"J Shotton","year":"2013","unstructured":"Shotton, J., Sharp, T., Kipman, A., Fitzgibbon, A., Finocchio, M., Blake, A., Cook, M., Moore, R.: Real-time human pose recognition in parts from single depth images. Commun. ACM 56(1), 116\u2013124 (2013)","journal-title":"Commun. ACM"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, D., Shah, M.: Human pose estimation in videos. In: ICCV, pp. 2012\u20132020 (2015)","DOI":"10.1109\/ICCV.2015.233"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Yasin, H., Iqbal, U., Kr\u00fcger, B., Weber, A., Gall, J.: A dual-source approach for 3D pose estimation from a single image. In: CVPR, pp. 4948\u20134956 (2016)","DOI":"10.1109\/CVPR.2016.535"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Carreira, J., Sminchisescu, C.: Iterated second-order label sensitive pooling for 3D human pose estimation. In: CVPR, pp. 1661\u20131668 (2014)","DOI":"10.1109\/CVPR.2014.215"},{"key":"2_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1007\/978-3-319-16808-1_23","volume-title":"Computer Vision \u2013 ACCV 2014","author":"S Li","year":"2015","unstructured":"Li, S., Chan, A.B.: 3D human pose estimation from monocular images with deep convolutional neural network. In: Cremers, D., Reid, I., Saito, H., Yang, M.-H. (eds.) ACCV 2014. LNCS, vol. 9004, pp. 332\u2013347. Springer, Heidelberg (2015). doi:\n                      10.1007\/978-3-319-16808-1_23"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Simo-Serra, E., Quattoni, A., Torras, C., Moreno-Noguer, F.: A joint model for 2D and 3D pose estimation from a single image. In: CVPR, pp. 3634\u20133641 (2013)","DOI":"10.1109\/CVPR.2013.466"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Akhter, I., Black, M.J.: Pose-conditioned joint angle limits for 3D human pose reconstruction. In: CVPR, pp. 1446\u20131455 (2015)","DOI":"10.1109\/CVPR.2015.7298751"},{"issue":"8","key":"2_CR24","doi-asserted-by":"publisher","first-page":"1492","DOI":"10.1109\/TPAMI.2016.2526002","volume":"38","author":"F Zhou","year":"2016","unstructured":"Zhou, F., la Torre, F.D.: Spatio-temporal matching for human pose estimation in video. IEEE Trans. Pattern Anal. Mach. Intell. 38(8), 1492\u20131504 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: DeepPose: human pose estimation via deep neural networks. In: CVPR, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"2_CR26","unstructured":"Tompson, J., Jain, A., LeCun, Y., Bregler, C.: Joint training of a convolutional network and a graphical model for human pose estimation. In: NIPS, pp. 1799\u20131807 (2014)"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Tompson, J., Goroshin, R., Jain, A., LeCun, Y., Bregler, C.: Efficient object localization using convolutional networks. In: CVPR, pp. 648\u2013656 (2015)","DOI":"10.1109\/CVPR.2015.7298664"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Li, S., Zhang, W., Chan, A.B.: Maximum-margin structured learning with deep networks for 3D human pose estimation. In: ICCV, pp. 2848\u20132856 (2015)","DOI":"10.1109\/ICCV.2015.326"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Tekin, B., Rozantsev, A., Lepetit, V., Fua, P.: Direct prediction of 3D body poses from motion compensated sequences. In: CVPR, pp. 991\u20131000 (2016)","DOI":"10.1109\/CVPR.2016.113"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Kostrikov, I.: Depth sweep regression forests for estimating 3D human pose from images. In: BMVC, pp. 1\u201313 (2014)","DOI":"10.5244\/C.28.80"},{"issue":"12","key":"2_CR31","doi-asserted-by":"publisher","first-page":"5659","DOI":"10.1109\/TIP.2015.2487860","volume":"24","author":"C Hong","year":"2015","unstructured":"Hong, C., Yu, J., Wan, J., Tao, D., Wang, M.: Multimodal deep autoencoder for human pose recovery. IEEE Trans. Image Process. 24(12), 5659\u20135670 (2015)","journal-title":"IEEE Trans. Image Process."},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhu, M., Leonardos, S., Derpanis, K., Daniilidis, K.: Sparseness meets deepness: 3D human pose estimation from monocular video. In: CVPR, pp. 4966\u20134975 (2016)","DOI":"10.1109\/CVPR.2016.537"},{"key":"2_CR33","series-title":"Lecture Notes in Electrical Engineering","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/978-94-007-2911-7_36","volume-title":"Proceedings of the International Conference on IT Convergence and Security 2011","author":"S-W Park","year":"2012","unstructured":"Park, S.-W., Kim, T.-E., Choi, J.-S.: Robust estimation of heights of moving people using a single camera. In: Kim, K.J., Ahn, S.J. (eds.) Proceedings of the International Conference on IT Convergence and Security 2011. LNEE, vol. 120, pp. 389\u2013405. Springer, Heidelberg (2012). doi:\n                      10.1007\/978-94-007-2911-7_36"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Johnson, S., Everingham, M.: Clustered pose and nonlinear appearance models for human pose estimation. In: BMVC, pp. 1\u201311 (2010)","DOI":"10.5244\/C.24.12"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Benbakreti, S., Benyettou, M.: Gait recognition based on leg motion and contour of silhouette. In: ICITeS, pp. 1\u20135 (2012)","DOI":"10.1109\/ICITeS.2012.6216626"},{"key":"2_CR36","unstructured":"Srivastava, N., Salakhutdinov, R.R.: Multimodal learning with deep boltzmann machines. In: NIPS, pp. 2222\u20132230 (2012)"},{"key":"2_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/978-3-319-10584-0_20","volume-title":"Computer Vision \u2013 ECCV 2014","author":"B Hariharan","year":"2014","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Girshick, R., Malik, J.: Simultaneous detection and segmentation. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 297\u2013312. Springer, Heidelberg (2014). doi:\n                      10.1007\/978-3-319-10584-0_20"},{"key":"2_CR38","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: NIPS, pp. 568\u2013576 (2014)"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Eitel, A., Springenberg, J.T., Spinello, L., Riedmiller, M.A., Burgard, W.: Multimodal deep learning for robust RGB-D object recognition. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, September 2015, pp. 681\u2013687 (2015)","DOI":"10.1109\/IROS.2015.7353446"},{"key":"2_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/978-3-319-10584-0_23","volume-title":"Computer Vision \u2013 ECCV 2014","author":"S Gupta","year":"2014","unstructured":"Gupta, S., Girshick, R., Arbel\u00e1ez, P., Malik, J.: Learning rich features from RGB-D images for object detection and segmentation. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 345\u2013360. Springer, Heidelberg (2014). doi:\n                      10.1007\/978-3-319-10584-0_23"},{"key":"2_CR41","unstructured":"Li, W., Wong, Y., Liu, A.A., Li, Y., Su, Y.T., Kankanhalli, M.: Multi-camera action dataset (MCAD): a dataset for studying non-overlapped cross-camera action recognition. CoRR abs\/1607.06408 (2016)"},{"key":"2_CR42","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/BFb0067700","volume-title":"Numerical Analysis","author":"JJ Mor\u00e9","year":"1978","unstructured":"Mor\u00e9, J.J.: The levenberg-marquardt algorithm: implementation and theory. In: Watson, G.A. (ed.) Numerical Analysis, pp. 105\u2013116. Springer, Heidelberg (1978)"},{"issue":"2","key":"2_CR43","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s11263-008-0152-6","volume":"81","author":"V Lepetit","year":"2009","unstructured":"Lepetit, V., Moreno-Noguer, F., Fua, P.: EPnP: an accurate $$ O(n)$$ solution to the PnP problem. Int. J. Comput. Vis. 81(2), 155\u2013166 (2009)","journal-title":"Int. J. Comput. Vis."},{"issue":"7","key":"2_CR44","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1\u20132","key":"2_CR45","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A., Black, M.: HumanEva: synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vis. 87(1\u20132), 4\u201327 (2010)","journal-title":"Int. J. Comput. Vis."},{"key":"2_CR46","unstructured":"Fan, X., Zheng, K., Lin, Y., Wang, S.: Combining local appearance and holistic view: dual-source deep neural networks for human pose estimation. In: CVPR, pp. 1347\u20131355 (2015)"},{"key":"2_CR47","unstructured":"Carnegie Mellon University Motion Capture Database, \n                      http:\/\/mocap.cs.cmu.edu"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46493-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,10]],"date-time":"2020-10-10T01:13:30Z","timestamp":1602292410000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46493-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319464923","9783319464930"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46493-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"17 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}