{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:14:49Z","timestamp":1775578489412,"version":"3.50.1"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012489","type":"print"},{"value":"9783030012496","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01249-6_49","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T15:35:46Z","timestamp":1538753746000},"page":"817-834","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["Occlusion-Aware Hand Pose Estimation Using Hierarchical Mixture Density Network"],"prefix":"10.1007","author":[{"given":"Qi","family":"Ye","sequence":"first","affiliation":[]},{"given":"Tae-Kyun","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"49_CR1","unstructured":"Bishop, C.M.: Mixture density networks (1994)"},{"key":"49_CR2","volume-title":"Pattern Recognition and Machine Learning","author":"CM Bishop","year":"2006","unstructured":"Bishop, C.M.: Pattern Recognition and Machine Learning. Springer, New York (2006)"},{"key":"49_CR3","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1016\/j.cviu.2016.01.010","volume":"148","author":"H Chang","year":"2016","unstructured":"Chang, H., Garcia-Hernando, G., Tang, D., Kim, T.K.: Spatio-temporal hough forest for efficient detection-localisation-recognition of fingerwriting in egocentric camera. CVIU 148, 87\u201396 (2016). https:\/\/doi.org\/10.1016\/j.cviu.2016.01.010","journal-title":"CVIU"},{"key":"49_CR4","doi-asserted-by":"crossref","unstructured":"Charles, J., Pfister, T., Magee, D., Hogg, D., Zisserman, A.: Personalizing human video pose estimation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.334"},{"key":"49_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Yuille, A.: Parsing occluded people by flexible compositions. In: CVPR (2014)","DOI":"10.1109\/CVPR.2015.7299020"},{"issue":"6","key":"49_CR6","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1109\/TPAMI.2006.111","volume":"28","author":"C Constantinopoulos","year":"2006","unstructured":"Constantinopoulos, C., Titsias, M.K., Likas, A.: Bayesian feature and model selection for gaussian mixture models. TPAMI 28(6), 1013\u20131018 (2006). https:\/\/doi.org\/10.1109\/TPAMI.2006.111","journal-title":"TPAMI"},{"key":"49_CR7","doi-asserted-by":"crossref","unstructured":"Garcia-Hernando, G., Yuan, S., Baek, S., Kim, T.: First-person hand action benchmark with RGB-D videos and 3D hand pose annotations. CoRR abs\/1704.02463 (2017). http:\/\/arxiv.org\/abs\/1704.02463","DOI":"10.1109\/CVPR.2018.00050"},{"key":"49_CR8","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Yang, Y., Ramanan, D., Fowlkes, C.C.: Parsing occluded people. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.308"},{"key":"49_CR9","doi-asserted-by":"crossref","unstructured":"Guo, H., Wang, G., Chen, X., Zhang, C., Qiao, F., Yang, H.: Region ensemble network: improving convolutional network for hand pose estimation. In: ICIP (2017)","DOI":"10.1109\/ICIP.2017.8297136"},{"key":"49_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1007\/978-3-319-46448-0_10","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Haque","year":"2016","unstructured":"Haque, A., Peng, B., Luo, Z., Alahi, A., Yeung, S., Fei-Fei, L.: Towards viewpoint invariant 3D human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 160\u2013177. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_10"},{"key":"49_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"49_CR12","doi-asserted-by":"crossref","unstructured":"Hsiao, E., Hebert, M.: Occlusion reasoning for object detection under arbitrary viewpoint. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248048"},{"issue":"4","key":"49_CR13","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1109\/TVCG.2015.2391860","volume":"21","author":"Y Jang","year":"2015","unstructured":"Jang, Y., Noh, S.T., Chang, H.J., Kim, T.K., Woo, W.: 3D finger cape: clicking action and position estimation under self-occlusions in egocentric viewpoint. IEEE Trans. Vis. Comput. Graph. (TVCG) 21(4), 501\u2013510 (2015)","journal-title":"IEEE Trans. Vis. Comput. Graph. (TVCG)"},{"key":"49_CR14","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2014)"},{"key":"49_CR15","doi-asserted-by":"crossref","unstructured":"Kinoshita, K., Delcroix, M., Ogawa, A., Higuchi, T., Nakatani, T.: Deep mixture density network for statistical model-based feature enhancement. In: ICASSP (2017)","DOI":"10.1109\/ICASSP.2017.7952156"},{"key":"49_CR16","doi-asserted-by":"crossref","unstructured":"Mueller, F., Mehta, D., Sotnychenko, O., Sridhar, S., Casas, D., Theobalt, C.: Real-time hand tracking under occlusion from an egocentric RGB-D sensor. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.131"},{"key":"49_CR17","doi-asserted-by":"crossref","unstructured":"Navaratnam, R., Fitzgibbon, A.W., Cipolla, R.: The joint manifold model for semi-supervised multi-valued regression. In: ICCV (2007)","DOI":"10.1109\/ICCV.2007.4408976"},{"key":"49_CR18","unstructured":"Oberweger, M., Wohlhart, P., Lepetit, V.: Hands deep in deep learning for hand pose estimation. In: Computer Vision Winter Workshop (CVWW) (2015)"},{"key":"49_CR19","doi-asserted-by":"crossref","unstructured":"Oberweger, M., Wohlhart, P., Lepetit, V.: Training a feedback loop for hand pose estimation. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.379"},{"key":"49_CR20","doi-asserted-by":"crossref","unstructured":"Oberweger, M., Lepetit, V.: Deepprior++: improving fast and accurate 3D hand pose estimation. In: ICCV Workshops (2017)","DOI":"10.1109\/ICCVW.2017.75"},{"key":"49_CR21","doi-asserted-by":"crossref","unstructured":"Oikonomidis, I., Kyriazis, N., Argyros, A.: Efficient model-based 3D tracking of hand articulations using Kinect. In: BMVC (2011)","DOI":"10.5244\/C.25.101"},{"key":"49_CR22","doi-asserted-by":"crossref","unstructured":"Oikonomidis, I., Kyriazis, N., Argyros, A.: Tracking the articulated motion of two strongly interacting hands. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6247885"},{"key":"49_CR23","doi-asserted-by":"crossref","unstructured":"Oikonomidis, I., Kyriazis, N., Argyros, A.A.: Full DOF tracking of a hand interacting with an object by modeling occlusions and physical constraints. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126483"},{"key":"49_CR24","doi-asserted-by":"crossref","unstructured":"Poier, G., Roditakis, K., Schulter, S., Michel, D., Bischof, H., Argyros, A.: Hybrid one-shot 3D hand pose estimation by exploiting uncertainties. In: BMVC (2015)","DOI":"10.5244\/C.29.182"},{"key":"49_CR25","doi-asserted-by":"crossref","unstructured":"Qian, C., Sun, X., Wei, Y., Tang, X., Sun, J.: Realtime and robust hand tracking from depth. In: ICCV (2014)","DOI":"10.1109\/CVPR.2014.145"},{"key":"49_CR26","doi-asserted-by":"crossref","unstructured":"Rafi, U., Gall, J., Leibe, B.: A semantic occlusion model for human pose estimation from a single depth image. In: CVPR Workshops (2015)","DOI":"10.1109\/CVPRW.2015.7301338"},{"key":"49_CR27","doi-asserted-by":"crossref","unstructured":"Rogez, G., Supancic, J.S., Ramanan, D.: First-person pose recognition using egocentric workspaces. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299061"},{"key":"49_CR28","unstructured":"Rogez, G., Supancic III, J.S., Khademi, M., Montiel, J.M.M., Ramanan, D.: 3D hand pose detection in egocentric RGB-D images. In: ECCV Workshops (2014)"},{"key":"49_CR29","doi-asserted-by":"crossref","unstructured":"Rohrbach, M., Amin, S., Andriluka, M., Schiele, B.: A database for fine grained activity detection of cooking activities. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6247801"},{"key":"49_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"49_CR31","doi-asserted-by":"crossref","unstructured":"Sharp, T., et al.: Accurate, robust, and flexible real-time hand tracking. In: CHI (2015)","DOI":"10.1145\/2702123.2702179"},{"key":"49_CR32","unstructured":"Sigal, L., Black, M.J.: Measure locally, reason globally: occlusion-sensitive articulated pose estimation. In: CVPR (2006)"},{"key":"49_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1007\/978-3-319-46475-6_19","volume-title":"Computer Vision \u2013 ECCV 2016","author":"S Sridhar","year":"2016","unstructured":"Sridhar, S., Mueller, F., Zollh\u00f6fer, M., Casas, D., Oulasvirta, A., Theobalt, C.: Real-time joint tracking of a hand manipulating an object from RGB-D input. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 294\u2013310. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_19"},{"key":"49_CR34","doi-asserted-by":"crossref","unstructured":"Sun, X., Wei, Y., Liang, S., Tang, X., Sun, J.: Cascaded hand pose regression. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298683"},{"key":"49_CR35","doi-asserted-by":"crossref","unstructured":"Tan, D.J., et al.: Fits like a glove: rapid and reliable hand shape personalization. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.605"},{"key":"49_CR36","doi-asserted-by":"crossref","unstructured":"Tang, D., Taylor, J., Kohli, P., Keskin, C., Kim, T.K., Shotton, J.: Opening the black box: hierarchical sampling optimization for estimating human hand pose. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.380"},{"key":"49_CR37","doi-asserted-by":"crossref","unstructured":"Tang, D., Chang, H.J., Tejani, A., Kim, T.K.: Latent regression forest: structured estimation of 3D hand posture. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.490"},{"key":"49_CR38","doi-asserted-by":"crossref","unstructured":"Tang, D., Yu, T.H., Kim, T.K.: Real-time articulated hand pose estimation using semi-supervised transductive regression forests. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.400"},{"issue":"4","key":"49_CR39","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1145\/2897824.2925965","volume":"35","author":"J Taylor","year":"2016","unstructured":"Taylor, J., et al.: Efficient and precise interactive hand tracking through joint, continuous optimization of pose and correspondences. TOG 35(4), 143 (2016)","journal-title":"TOG"},{"key":"49_CR40","doi-asserted-by":"crossref","unstructured":"Tome, D., Russell, C., Agapito, L.: Lifting from the deep: convolutional 3D pose estimation from a single image. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.603"},{"issue":"5","key":"49_CR41","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1145\/2629500","volume":"33","author":"J Tompson","year":"2014","unstructured":"Tompson, J., Stein, M., Lecun, Y., Perlin, K.: Real-time continuous pose recovery of human hands using convolutional networks. TOG 33(5), 169 (2014)","journal-title":"TOG"},{"issue":"2","key":"49_CR42","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1007\/s11263-016-0895-4","volume":"118","author":"D Tzionas","year":"2016","unstructured":"Tzionas, D., Ballan, L., Srikantha, A., Aponte, P., Pollefeys, M., Gall, J.: Capturing hands in action using discriminative salient points and physics simulation. IJCV 118(2), 172\u2013193 (2016). https:\/\/doi.org\/10.1007\/s11263-016-0895-4","journal-title":"IJCV"},{"key":"49_CR43","doi-asserted-by":"crossref","unstructured":"Variani, E., McDermott, E., Heigold, G.: A Gaussian mixture model layer jointly optimized with discriminative features within a deep neural network architecture. In: ICASSP (2015)","DOI":"10.1109\/ICASSP.2015.7178776"},{"key":"49_CR44","doi-asserted-by":"crossref","unstructured":"Wang, T., He, X., Barnes, N.: Learning structured hough voting for joint object detection and occlusion reasoning. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.234"},{"key":"49_CR45","doi-asserted-by":"crossref","unstructured":"Yang, W., Ouyang, W., Li, H., Wang, X.: End-to-end learning of deformable mixture of parts and deep convolutional neural networks for human pose estimation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.335"},{"key":"49_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-319-46484-8_21","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Q Ye","year":"2016","unstructured":"Ye, Q., Yuan, S., Kim, T.-K.: Spatial attention deep net with partial PSO for hierarchical hybrid hand pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 346\u2013361. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_21"},{"key":"49_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1007\/978-3-319-46478-7_27","volume-title":"Computer Vision \u2013 ECCV 2016","author":"F Yin","year":"2016","unstructured":"Yin, F., Chai, X., Chen, X.: Iterative reference driven metric learning for signer independent isolated sign language recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 434\u2013450. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_27"},{"key":"49_CR48","doi-asserted-by":"crossref","unstructured":"Yuan, S., Ye, Q., Stenger, B., Kim, T.K.: BigHand2.2M benchmark: hand pose data set and state of the art analysis. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.279"},{"key":"49_CR49","doi-asserted-by":"crossref","unstructured":"Zen, H., Senior, A.: Deep mixture density networks for acoustic modeling in statistical parametric speech synthesis. In: ICASSP (2014)","DOI":"10.1109\/ICASSP.2014.6854321"},{"key":"49_CR50","unstructured":"Zhou, X., Wan, Q., Zhang, W., Xue, X., Wei, Y.: Model-based deep hand pose estimation. In: IJCAI (2016)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01249-6_49","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T01:00:23Z","timestamp":1664931623000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01249-6_49"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012489","9783030012496"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01249-6_49","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}