{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T07:13:10Z","timestamp":1770966790365,"version":"3.50.1"},"publisher-location":"Cham","reference-count":55,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319168074","type":"print"},{"value":"9783319168081","type":"electronic"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-16808-1_21","type":"book-chapter","created":{"date-parts":[[2015,4,15]],"date-time":"2015-04-15T09:14:46Z","timestamp":1429089286000},"page":"302-315","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":56,"title":["MoDeep: A Deep Learning Framework Using Motion Features for Human Pose Estimation"],"prefix":"10.1007","author":[{"given":"Arjun","family":"Jain","sequence":"first","affiliation":[]},{"given":"Jonathan","family":"Tompson","sequence":"additional","affiliation":[]},{"given":"Yann","family":"LeCun","sequence":"additional","affiliation":[]},{"given":"Christoph","family":"Bregler","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,4,16]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Sapp, B., Taskar, B.: Modec: multimodal decomposable models for human pose estimation. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.471"},{"key":"21_CR2","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: CVPR (2005)"},{"key":"21_CR3","doi-asserted-by":"publisher","first-page":"201","DOI":"10.3758\/BF03212378","volume":"14","author":"G Johansson","year":"1973","unstructured":"Johansson, G.: Visual perception of biological motion and a model for its analysis. Percept. Psychophys. 14, 201\u2013211 (1973)","journal-title":"Percept. Psychophys."},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Ferrari, V., Marin-Jimenez, M., Zisserman, A.: Progressive search space reduction for human pose estimation. In: CVPR (2008)","DOI":"10.1109\/CVPR.2008.4587468"},{"key":"21_CR5","unstructured":"Weiss, D., Sapp, B., Taskar, B.: Sidestepping intractable inference with structured ensemble cascades. In: NIPS (2010)"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Eichner, M., Ferrari, V.: Better appearance models for pictorial structures. In: BMVC (2009)","DOI":"10.5244\/C.23.3"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Yang, Y., Ramanan, D.: Articulated pose estimation with flexible mixtures-of-parts. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995741"},{"key":"21_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1007\/978-3-642-15552-9_30","volume-title":"Computer Vision \u2013 ECCV 2010","author":"B Sapp","year":"2010","unstructured":"Sapp, B., Toshev, A., Taskar, B.: Cascaded models for articulated pose estimation. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part II. LNCS, vol. 6312, pp. 406\u2013420. Springer, Heidelberg (2010)"},{"key":"21_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/0262-8856(83)90003-3","volume":"1","author":"D Hogg","year":"1983","unstructured":"Hogg, D.: Model-based vision: a program to see a walking person. Image Vis. Comput. 1, 5\u201320 (1983)","journal-title":"Image Vis. Comput."},{"key":"21_CR10","unstructured":"Rehg, J.M., Kanade, T.: Model-based tracking of self-occluding articulated objects. In: Computer Vision (1995)"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Kakadiaris, I.A., Metaxas, D.: Model-based estimation of 3d human motion with occlusion based on active multi-viewpoint selection. In: CVPR (1996)","DOI":"10.1109\/CVPR.1996.517057"},{"key":"21_CR12","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1109\/34.598236","volume":"19","author":"CR Wren","year":"1997","unstructured":"Wren, C.R., Azarbayejani, A., Darrell, T., Pentland, A.P.: Pfinder: Real-time tracking of the human body. IEEE Trans. Pattern Anal. Mach. Intell. 19, 780\u2013785 (1997)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"21_CR13","unstructured":"Bregler, C., Malik, J.: Tracking people with twists and exponential maps. In: CVPR (1998)"},{"key":"21_CR14","unstructured":"Deutscher, J., Blake, A., Reid, I.: Articulated body motion capture by annealed particle filtering. In: CVPR (2000)"},{"key":"21_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"702","DOI":"10.1007\/3-540-45053-X_45","volume-title":"Computer Vision - ECCV 2000","author":"H Sidenbladh","year":"2000","unstructured":"Sidenbladh, H., Black, M.J., Fleet, D.J.: Stochastic tracking of 3D human figures using 2D image motion. In: Vernon, D. (ed.) ECCV 2000. LNCS, vol. 1843, pp. 702\u2013718. Springer, Heidelberg (2000)"},{"key":"21_CR16","unstructured":"Sminchisescu, C., Triggs, B.: Covariance scaled sampling for monocular 3d body tracking. In: CVPR (2001)"},{"key":"21_CR17","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A., Black, M.J.: HumanEva: synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vis. 87, 4\u201327 (2010)","journal-title":"Int. J. Comput. Vis."},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Anguelov, D., Srinivasan, P., Koller, D., Thrun, S., Rodgers, J., Davis, J.: Scape: shape completion and animation of people. In: TOG (2005)","DOI":"10.1145\/1186822.1073207"},{"key":"21_CR19","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.cviu.2006.10.016","volume":"108","author":"R Poppe","year":"2007","unstructured":"Poppe, R.: Vision-based human motion analysis: an overview. Compu. Vis. Image Underst. 108, 4\u201318 (2007)","journal-title":"Compu. Vis. Image Underst."},{"key":"21_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1360612.1360697","volume":"27","author":"E De Aguiar","year":"2008","unstructured":"De Aguiar, E., Stoll, C., Theobalt, C., Ahmed, N., Seidel, H.P., Thrun, S.: Performance capture from sparse multi-view video. ACM Trans. Graph. 27, 1\u20139 (2008)","journal-title":"ACM Trans. Graph."},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Jain, A., Thorm\u00e4hlen, T., Seidel, H.P., Theobalt, C.: Moviereshape: tracking and reshaping of humans in videos. In: TOG (2010)","DOI":"10.1145\/1882262.1866174"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Stoll, C., Hasler, N., Gall, J., Seidel, H., Theobalt, C.: Fast articulated motion tracking using a sums of gaussians body model. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126338"},{"key":"21_CR23","unstructured":"Freeman, W.T., Roth, M.: Orientation histograms for hand gesture recognition. In: International Workshop on Automatic Face and Gesture Recognition (1995)"},{"key":"21_CR24","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vis. 60, 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vis."},{"key":"21_CR25","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I.: On space-time interest points. Int. J. Comput. Vis. 64, 107\u2013123 (2005)","journal-title":"Int. J. Comput. Vis."},{"key":"21_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1007\/11744047_33","volume-title":"Computer Vision \u2013 ECCV 2006","author":"N Dalal","year":"2006","unstructured":"Dalal, N., Triggs, B., Schmid, C.: Human detection using oriented histograms of flow and appearance. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3952, pp. 428\u2013441. Springer, Heidelberg (2006)"},{"key":"21_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"666","DOI":"10.1007\/3-540-47977-5_44","volume-title":"Computer Vision \u2013 ECCV 2002","author":"G Mori","year":"2002","unstructured":"Mori, G., Malik, J.: Estimating human body configurations using shape context matching. In: Heyden, A., Sparr, G., Nielsen, M., Johansen, P. (eds.) ECCV 2002. LNCS, vol. 2352, pp. 666\u2013680. Springer, Heidelberg (2002)"},{"key":"21_CR28","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/TPAMI.2006.21","volume":"28","author":"A Agarwal","year":"2006","unstructured":"Agarwal, A., Triggs, B., Rhone-Alpes, I., Montbonnot, F.: Recovering 3D human pose from monocular images. IEEE Trans. Pattern Anal. Mach. Intell. 28, 44\u201358 (2006)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Grauman, K., Shakhnarovich, G., Darrell, T.: Inferring 3d structure with a statistical image-based shape model. In: ICCV (2003)","DOI":"10.1109\/ICCV.2003.1238408"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Shakhnarovich, G., Viola, P., Darrell, T.: Fast pose estimation with parameter-sensitive hashing. In: ICCV (2003)","DOI":"10.1109\/ICCV.2003.1238424"},{"key":"21_CR31","unstructured":"Ramanan, D., Forsyth, D., Zisserman, A.: Strike a pose: Tracking people by finding stylized poses. In: CVPR (2005)"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Buehler, P., Zisserman, A., Everingham, M.: Learning sign language by watching TV (using weakly aligned subtitles) (2009)","DOI":"10.1109\/CVPR.2009.5206523"},{"key":"21_CR33","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/T-C.1973.223602","volume":"22","author":"MA Fischler","year":"1973","unstructured":"Fischler, M.A., Elschlager, R.: The representation and matching of pictorial structures. IEEE Trans. Comput. 22, 67\u201392 (1973)","journal-title":"IEEE Trans. Comput."},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., McAllester, D., Ramanan, D.: A discriminatively trained, multiscale, deformable part model. In: CVPR (2008)","DOI":"10.1109\/CVPR.2008.4587597"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., Schiele, B.: Pictorial structures revisited: people detection and articulated pose estimation. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206754"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Dantone, M., Gall, J., Leistner, C., Gool., L.V.: Human pose estimation using body parts dependent joint regressors. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.391"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Johnson, S., Everingham, M.: Learning effective human pose estimation from inaccurate annotation. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995318"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Pishchulin, L., Andriluka, M., Gehler, P., Schiele, B.: Poselet conditioned pictorial structures. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.82"},{"key":"21_CR39","doi-asserted-by":"crossref","unstructured":"Bourdev, L., Malik, J.: Poselets: body part detectors trained using 3d human pose annotations. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459303"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Arbelaez, P., Bourdev, L., Malik, J.: Articulated pose estimation using discriminative armlet classifiers. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.429"},{"key":"21_CR41","doi-asserted-by":"crossref","unstructured":"Shotton, J., Sharp, T., Kipman, A., Fitzgibbon, A., Finocchio, M., Blake, A., Cook, M., Moore, R.: Real-time human pose recognition in parts from single depth images. ACM (2013)","DOI":"10.1007\/978-3-642-28661-2_5"},{"key":"21_CR42","unstructured":"Zeiler, M., R., F.: Visualizing and understanding convolutional neural networks. In: arXiv preprint arXiv:1311.2901. (2013)"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"Razavian, A.S., Azizpour, H., Sullivan, J., Carlsson, S.: Cnn features off-the-shelf: an astounding baseline for recognition (2014)","DOI":"10.1109\/CVPRW.2014.131"},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Yaniv Taigman, Ming Yang, M.R., Wolf, L.: Deepface: closing the gap to human-level performance in face verification. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.220"},{"key":"21_CR45","doi-asserted-by":"crossref","unstructured":"Deng, L., Abdel-Hamid, O., Yu, D.: A deep convolutional neural network using heterogeneous pooling for trading acoustic invariance with phonetic confusion. In: ICASSP (2013)","DOI":"10.1109\/ICASSP.2013.6638952"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Sermanet, P., Kavukcuoglu, K., Chintala, S., LeCun, Y.: Pedestrian detection with unsupervised multi-stage feature learning. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.465"},{"key":"21_CR47","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Revaud, J., Harchaoui, Z., Schmid, C.: Deepflow: large displacement optical flow with deep matching. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.175"},{"key":"21_CR48","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: Deeppose: Human pose estimation via deep neural networks. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"21_CR49","unstructured":"Jain, A., Tompson, J., Andriluka, M., Taylor, G., Bregler, C.: Learning human pose estimation features with convolutional networks. In: ICLR (2014)"},{"key":"21_CR50","doi-asserted-by":"crossref","unstructured":"Tompson, J., Stein, M., LeCun, Y., Perlin, K.: Real-time continuous pose recovery of human hands using convolutional networks. In: TOG (2014)","DOI":"10.1145\/2629500"},{"key":"21_CR51","doi-asserted-by":"crossref","unstructured":"Johnson, S., Everingham, M.: Clustered pose and nonlinear appearance models for human pose estimation. In: BMVC (2010)","DOI":"10.5244\/C.24.12"},{"key":"21_CR52","unstructured":"Collobert, R., Kavukcuoglu, K., Farabet, C.: Torch7: a matlab-like environment for machine learning. In: BigLearn, NIPS Workshop (2011)"},{"key":"21_CR53","doi-asserted-by":"crossref","unstructured":"Giusti, A., Ciresan, D.C., Masci, J., Gambardella, L.M., Schmidhuber, J.: Fast image scanning with deep max-pooling convolutional neural networks. In: CoRR (2013)","DOI":"10.1109\/ICIP.2013.6738831"},{"key":"21_CR54","unstructured":"Sermanet, P., Eigen, D., Zhang, X., Mathieu, M., Fergus, R., LeCun, Y.: Overfeat: Integrated recognition, localization and detection using convolutional networks. In: ICLR (2014)"},{"key":"21_CR55","unstructured":"Sutskever, I., Martens, J., Dahl, G., Hinton, G.: On the importance of initialization and momentum in deep learning. In: ICML (2013)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision -- ACCV 2014"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-16808-1_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,20]],"date-time":"2023-01-20T16:14:17Z","timestamp":1674231257000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-16808-1_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319168074","9783319168081"],"references-count":55,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-16808-1_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"16 April 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}