{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T20:28:16Z","timestamp":1779913696739,"version":"3.53.1"},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,2,23]],"date-time":"2017-02-23T00:00:00Z","timestamp":1487808000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1007\/s11263-017-0998-6","type":"journal-article","created":{"date-parts":[[2017,2,22]],"date-time":"2017-02-22T21:41:52Z","timestamp":1487799712000},"page":"454-478","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":102,"title":["Lie-X: Depth Image Based Articulated Object Pose Estimation, Tracking, and Action Recognition on Lie Groups"],"prefix":"10.1007","volume":"123","author":[{"given":"Chi","family":"Xu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lakshmi Narasimhan","family":"Govindarajan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3261-3533","authenticated-orcid":false,"given":"Li","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2017,2,23]]},"reference":[{"key":"998_CR1","doi-asserted-by":"crossref","unstructured":"Agarwal, A., & Triggs, B. (2006). Recovering 3D human pose from monocular images. IEEE Transanction on PAMI 28(1), 44\u201358.","DOI":"10.1109\/TPAMI.2006.21"},{"key":"998_CR2","doi-asserted-by":"crossref","unstructured":"Ali, K., Fleuret, F., Hasler, D., & Fua, P. (2009). Joint pose estimator and feature learning for object detection. In ICCV.","DOI":"10.1109\/ICCV.2009.5459304"},{"key":"998_CR3","unstructured":"Altafini, C. (2000). Nonlinear control in year 2000, chap. The De Casteljau algorithm on SE(3) (pp. 1\u201312). Springer, Berlin."},{"key":"998_CR4","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., & Schiele, B. (2008). People-tracking-by-detection and people-detection-by-tracking. In CVPR.","DOI":"10.1109\/CVPR.2008.4587583"},{"key":"998_CR5","unstructured":"Andrychowicz, M., Denil, M., Gomez, S., Hoffman, M., Pfau, D., Schaul, T., Shillingford, B., & de Freitas, N. (2016). Learning to learn by gradient descent by gradient descent (pp. 1\u201350)."},{"key":"998_CR6","volume-title":"Mathematical methods of classical mechanics","author":"VI Arnol\u2019d","year":"2013","unstructured":"Arnol\u2019d, V. I. (2013). Mathematical methods of classical mechanics. Berlin: Springer."},{"key":"998_CR7","doi-asserted-by":"crossref","unstructured":"Ballan, L., Taneja, A., Gall, J., Gool, L.V., & Pollefeys, M. (2012). Motion capture of hands in action using discriminative salient points. In ECCV.","DOI":"10.1007\/978-3-642-33783-3_46"},{"key":"998_CR8","unstructured":"Barsoum, E. (2016). Articulated hand pose estimation review. arXiv:1604.06195 ."},{"key":"998_CR9","doi-asserted-by":"crossref","unstructured":"Bookstein, F. (1977). The study of shape transformation after D\u2019Arcy Thompson. Mathematical Biosciences, 34(3\u20134), 177\u2013219.","DOI":"10.1016\/0025-5564(77)90101-8"},{"key":"998_CR10","doi-asserted-by":"crossref","unstructured":"Bourdev, L., & Malik, J. (2009). Poselets: Body part detectors trained using 3D human pose annotations. In ICCV.","DOI":"10.1109\/ICCV.2009.5459303"},{"key":"998_CR11","doi-asserted-by":"crossref","unstructured":"Branson, K., & Belongie, S. (2005). Tracking multiple mouse contours (without too many samples). In CVPR.","DOI":"10.1109\/CVPR.2005.349"},{"issue":"1","key":"998_CR12","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45(1), 5\u201332.","journal-title":"Machine Learning"},{"key":"998_CR13","doi-asserted-by":"crossref","unstructured":"Burges, C., Shaked, T., Renshaw, E., Lazier, A., Deeds, M., Hamilton, N., & Hullender, G. (2005). Learning to rank using gradient descent. In ICML.","DOI":"10.1145\/1102351.1102363"},{"issue":"15","key":"998_CR14","doi-asserted-by":"crossref","first-page":"1995","DOI":"10.1016\/j.patrec.2013.02.006","volume":"34","author":"L Chen","year":"2013","unstructured":"Chen, L., Wei, H., & Ferryman, J. (2013). A survey on model based approaches for 2D and 3D visual human pose recovery. PRL, 34(15), 1995\u20132006.","journal-title":"PRL"},{"key":"998_CR15","doi-asserted-by":"crossref","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., & Belongie, S. (2005). Behavior recognition via sparse spatio-temporal features. In IEEE Workshop on PETS.","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"998_CR16","doi-asserted-by":"crossref","unstructured":"Dollar, P., Welinder, P., & Perona, P. (2010). Cascaded pose regression. In CVPR.","DOI":"10.1109\/CVPR.2010.5540094"},{"issue":"1","key":"998_CR17","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1023\/B:VISI.0000042934.15159.49","volume":"61","author":"P Felzenszwalb","year":"2005","unstructured":"Felzenszwalb, P., & Huttenlocher, D. (2005). Pictorial structures for object recognition. International Journal of Computer Vision, 61(1), 55\u201379.","journal-title":"International Journal of Computer Vision"},{"key":"998_CR18","first-page":"2549","volume":"9","author":"F Fleuret","year":"2008","unstructured":"Fleuret, F., & Geman, D. (2008). Stationary features and cat detection. JMLR, 9, 2549\u20132578.","journal-title":"JMLR"},{"issue":"11","key":"998_CR19","doi-asserted-by":"crossref","first-page":"2188","DOI":"10.1109\/TPAMI.2011.70","volume":"33","author":"J Gall","year":"2011","unstructured":"Gall, J., Yao, A., Razavi, N., van Gool, L., & Lempitsky, V. (2011). Hough forests for object detection, tracking, and action recognition. IEEE Transactions on PAMI, 33(11), 2188\u20132202.","journal-title":"IEEE Transactions on PAMI"},{"key":"998_CR20","unstructured":"Hinterstoisser, S., Lepetit, V., Ilic, S., Fua, P., & Navab, N. (2010). Dominant orientation templates for real-time detection of textureless objects. In CVPR."},{"key":"998_CR21","unstructured":"Hough, P. (1959). Machine analysis of bubble chamber pictures. In Proceedings of International Conference on High Energy Accelerators and Instrumentation."},{"key":"998_CR22","volume-title":"Stochastic analysis on manifolds","author":"EP Hsu","year":"2002","unstructured":"Hsu, E. P. (2002). Stochastic analysis on manifolds. New York: AMS press."},{"key":"998_CR23","doi-asserted-by":"crossref","unstructured":"Huang, C., Allain, B., Franco, J., Navab, N., & Boyer, E. (2016). Volumetric 3D tracking by detection. In CVPR.","DOI":"10.1109\/CVPR.2016.419"},{"key":"998_CR24","doi-asserted-by":"crossref","unstructured":"Isard, M., & Blake, A. (1998). Condensation\u2014Conditional density propagation for visual tracking. International Journal of Computer Vision, 29(1), 5\u201328.","DOI":"10.1023\/A:1008078328650"},{"issue":"1","key":"998_CR25","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1089\/zeb.2012.0861","volume":"10","author":"A Kalueff","year":"2013","unstructured":"Kalueff, A., Gebhardt, M., Stewart, A., Cachat, J., Brimmer, M., Chawla, J., et al. (2013). Towards a comprehensive catalog of zebrafish behavior 1.0 and beyond. Zebrafish, 10(1), 70\u201386.","journal-title":"Zebrafish"},{"key":"998_CR26","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In CVPR.","DOI":"10.1109\/CVPR.2006.68"},{"key":"998_CR27","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21752-9","volume-title":"Introduction to smooth manifolds","author":"J Lee","year":"2003","unstructured":"Lee, J. (2003). Introduction to smooth manifolds. Berlin: Springer."},{"key":"998_CR28","unstructured":"Leibe, B., Leonardis, A., & Schiele, B. (2004). Combined object categorization and segmentation with an implicit shape model (pp. 17\u201332). In ECCV workshop on statistical learning in computer vision."},{"key":"998_CR29","doi-asserted-by":"crossref","unstructured":"Mahasseni, B., & Todorovic, S. (2016). Regularizing long short term memory with 3D human-skeleton sequences for action recognition. In CVPR.","DOI":"10.1109\/CVPR.2016.333"},{"issue":"4","key":"998_CR30","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1109\/JSTSP.2013.2264798","volume":"7","author":"J Manton","year":"2013","unstructured":"Manton, J. (2013). A primer on stochastic differential geometry for signal processing. IEEE Journal of Selected Topics in Signal Processing, 7(4), 681\u2013699.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"issue":"3","key":"998_CR31","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1023\/A:1023012723347","volume":"53","author":"I Mikic","year":"2003","unstructured":"Mikic, I., Trivedi, M. M., Hunter, E., & Cosman, P. C. (2003). Human body model acquisition and tracking using voxel data. International Journal of Computer Vision, 53(3), 199\u2013223.","journal-title":"International Journal of Computer Vision"},{"key":"998_CR32","volume-title":"A mathematical introduction to robotic manipulation","author":"R Murray","year":"1994","unstructured":"Murray, R., Sastry, S., & Li, Z. (1994). A mathematical introduction to robotic manipulation. boca raton: CRC Press."},{"key":"998_CR33","doi-asserted-by":"crossref","unstructured":"Nie, X., Xiong, C., & Zhu, S. (2015). Joint action recognition and pose estimation from video. In CVPR.","DOI":"10.1109\/CVPR.2015.7298734"},{"key":"998_CR34","unstructured":"Oberweger, M., Wohlhart, P., & Lepetit, V. (2015a). Hands deep in deep learning for hand pose estimation. In Computer Vision Winter Workshop."},{"key":"998_CR35","doi-asserted-by":"crossref","unstructured":"Oberweger, M., Wohlhart, P., & Lepetit, V. (2015b). Training a feedback loop for hand pose estimation. In ICCV.","DOI":"10.1109\/ICCV.2015.379"},{"key":"998_CR36","doi-asserted-by":"crossref","unstructured":"Oikonomidis, N., & Argyros, A. (2011). Efficient model-based 3D tracking of hand articulations using Kinect. In BMVC.","DOI":"10.5244\/C.25.101"},{"key":"998_CR37","doi-asserted-by":"crossref","first-page":"4189","DOI":"10.3390\/s140304189","volume":"14","author":"X Perez-Sala","year":"2014","unstructured":"Perez-Sala, X., Escalera, S., Angulo, C., & Gonzalez, J. (2014). Survey of human motion analysis using depth imagery. Sensors, 14, 4189\u20134210.","journal-title":"Sensors"},{"key":"998_CR38","doi-asserted-by":"crossref","unstructured":"Poppe, R. (2007). Vision-based human motion analysis: An overview. Computer Vision and Image Understanding, 108(1\u20132), 4\u201318.","DOI":"10.1016\/j.cviu.2006.10.016"},{"key":"998_CR39","volume-title":"Lie groups: An approach through invariants and representations","author":"C Procesi","year":"2007","unstructured":"Procesi, C. (2007). Lie groups: An approach through invariants and representations. Berlin: Springer."},{"key":"998_CR40","doi-asserted-by":"crossref","unstructured":"Qian, C., Sun, X., Wei, Y., Tang, X., & Sun, J. (2014). Realtime and robust hand tracking from depth. In CVPR.","DOI":"10.1109\/CVPR.2014.145"},{"key":"998_CR41","doi-asserted-by":"crossref","unstructured":"Rahmani, H., & Mian, A. (2016). 3D action recognition from novel viewpoints. In CVPR.","DOI":"10.1109\/CVPR.2016.167"},{"issue":"12","key":"998_CR42","doi-asserted-by":"crossref","first-page":"2821","DOI":"10.1109\/TPAMI.2012.241","volume":"35","author":"J Shotton","year":"2013","unstructured":"Shotton, J., Girshick, R., Fitzgibbon, A., Sharp, T., Cook, M., Finocchio, M., et al. (2013). Efficient human pose estimation from single depth images. IEEE TPAMI, 35(12), 2821\u201340.","journal-title":"IEEE TPAMI"},{"key":"998_CR43","doi-asserted-by":"crossref","unstructured":"Sinha, A., Choi, C., & Ramani, K. (2016). Deephand: Robust hand pose estimation by completing a matrix imputed with deep features. In CVPR.","DOI":"10.1109\/CVPR.2016.450"},{"issue":"6\u20137","key":"998_CR44","doi-asserted-by":"crossref","first-page":"398","DOI":"10.1016\/j.imavis.2012.03.006","volume":"30","author":"A Srivastava","year":"2012","unstructured":"Srivastava, A., Turaga, P., & Kurtek, S. (2012). On advances in differential-geometric approaches for 2D and 3D shape analyses and activity recognition. Image Vision Computing, 30(6\u20137), 398\u2013416.","journal-title":"Image Vision Computing"},{"key":"998_CR45","doi-asserted-by":"crossref","unstructured":"Sun, X., Wei, Y., Liang, S., Tang, X., & Sun, J. (2015). Cascaded hand pose regression. In CVPR.","DOI":"10.1109\/CVPR.2015.7298683"},{"key":"998_CR46","doi-asserted-by":"crossref","unstructured":"Tan, D., Cashman, T., Taylor, J., Fitzgibbon, A., Tarlow, D., Khamis, S., Izadi, S., & Shotton, J. (2016). Fits like a glove: Rapid and reliable hand shape personalization. In CVPR.","DOI":"10.1109\/CVPR.2016.605"},{"key":"998_CR47","doi-asserted-by":"crossref","unstructured":"Tang, D., Taylor, J., Kohli, P., Keskin, C., Kim, T., & Shotton, J. (2015). Opening the black box: Hierarchical sampling optimization for estimating human hand pose. In ICCV.","DOI":"10.1109\/ICCV.2015.380"},{"key":"998_CR48","unstructured":"Tompson, J., Jain, A., LeCun, Y., & Bregler, C. (2014). Joint training of a convolutional network and a graphical model for human pose estimation. In NIPS."},{"key":"998_CR49","doi-asserted-by":"crossref","unstructured":"Tompson, J., Stein, M., Lecun, Y., & Perlin, K. (2014). Real-time continuous pose recovery of human hands using convolutional networks. SIGGRAPH.","DOI":"10.1145\/2629500"},{"key":"998_CR50","doi-asserted-by":"crossref","unstructured":"Tuzel, O., Porikli, F., & Meer, P. (2008). Learning on Lie groups for invariant detection and tracking. In CVPR.","DOI":"10.1109\/CVPR.2008.4587521"},{"key":"998_CR51","doi-asserted-by":"crossref","unstructured":"Vemulapalli, R., Arrate, F., & Chellappa, R. (2014). Human action recognition by representing 3D skeletons as points in a Lie group. In CVPR.","DOI":"10.1109\/CVPR.2014.82"},{"key":"998_CR52","doi-asserted-by":"crossref","unstructured":"Vemulapalli, R., & Chellappa, R. (2016). Rolling rotations for recognizing human actions from 3D skeletal data. In CVPR.","DOI":"10.1109\/CVPR.2016.484"},{"issue":"6","key":"998_CR53","doi-asserted-by":"crossref","first-page":"1121","DOI":"10.1016\/j.neuron.2015.11.031","volume":"88","author":"A Wiltschko","year":"2015","unstructured":"Wiltschko, A., Johnson, M., Iurilli, G., Peterson, R., Katon, J., Pashkovski, S., et al. (2015). Mapping sub-second structure in mouse behavior. Neuron, 88(6), 1121\u201335.","journal-title":"Neuron"},{"key":"998_CR54","doi-asserted-by":"crossref","unstructured":"Xiong, X., & la Torre, F.D. (2013). Supervised descent method and its applications to face alignment. In CVPR.","DOI":"10.1109\/CVPR.2013.75"},{"key":"998_CR55","doi-asserted-by":"crossref","unstructured":"Xu, C., & Cheng, L. (2013). Efficient hand pose estimation from a single depth image. In ICCV.","DOI":"10.1109\/ICCV.2013.429"},{"key":"998_CR56","unstructured":"Xu, C., Nanjappa, A., Zhang, X., & Cheng, L. (2015). Estimate hand poses efficiently from single depth images. International Journal of Computer Vision, 1\u201325."},{"key":"998_CR57","doi-asserted-by":"crossref","unstructured":"Yang, Y., & Ramanan, D. (2011). Articulated pose estimation with flexible mixtures-of-parts. In CVPR.","DOI":"10.1109\/CVPR.2011.5995741"},{"key":"998_CR58","unstructured":"Zhou, X., Wan, Q., Zhang, W., Xue, X. & Wei, Y. (2016). Model-based deep hand pose estimation. In IJCAI."}],"updated-by":[{"DOI":"10.1007\/s11263-018-1069-3","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2018,3,10]],"date-time":"2018-03-10T00:00:00Z","timestamp":1520640000000}}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-017-0998-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-0998-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-0998-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,18]],"date-time":"2019-09-18T20:23:07Z","timestamp":1568838187000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-017-0998-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,23]]},"references-count":58,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,7]]}},"alternative-id":["998"],"URL":"https:\/\/doi.org\/10.1007\/s11263-017-0998-6","relation":{"correction":[{"id-type":"doi","id":"10.1007\/s11263-018-1069-3","asserted-by":"object"}]},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2,23]]}}}