{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:23:24Z","timestamp":1740137004443,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T00:00:00Z","timestamp":1694736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T00:00:00Z","timestamp":1694736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003848","name":"Industrial Technology Research Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003848","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100020950","name":"National Science and Technology Council","doi-asserted-by":"publisher","award":["111-2222-E-A49-008 \u2013"],"award-info":[{"award-number":["111-2222-E-A49-008 \u2013"]}],"id":[{"id":"10.13039\/501100020950","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Virtual Reality"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10055-023-00860-6","type":"journal-article","created":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T06:01:45Z","timestamp":1694757705000},"page":"3133-3148","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Robust vision-based glove pose estimation for both hands in virtual reality"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9102-6801","authenticated-orcid":false,"given":"Fu-Song","family":"Hsu","sequence":"first","affiliation":[]},{"given":"Te-Mei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Liang-Hsun","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,15]]},"reference":[{"key":"860_CR1","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1109\/CVPR.2000.855884","volume":"1","author":"C Barron","year":"2000","unstructured":"Barron C, Kakadiaris IA (2000) Estimating anthropometry and pose from a single image. Proc IEEE Conf Comput vis Pattern Recognit 1:669\u2013676. https:\/\/doi.org\/10.1109\/CVPR.2000.855884","journal-title":"Proc IEEE Conf Comput vis Pattern Recognit"},{"key":"860_CR2","doi-asserted-by":"publisher","unstructured":"Buxton W, Myers B (1986) A study in two-handed input. In: Proceedings of the SIGCHI conference on human factors in computing systems, Boston, Massachusetts, USA., 321\u2013326. https:\/\/doi.org\/10.1145\/22627.22390","DOI":"10.1145\/22627.22390"},{"key":"860_CR3","doi-asserted-by":"publisher","unstructured":"Buxton W (1995) Chunking and phrasing and the design of human-computer dialogues. In: Baecker RM, Grudin J, Buxton WAS, Greenberg S. (Eds), Readings in human\u2013computer interaction, 494\u2013499. https:\/\/doi.org\/10.1016\/B978-0-08-051574-8.50051-0","DOI":"10.1016\/B978-0-08-051574-8.50051-0"},{"issue":"4","key":"860_CR4","doi-asserted-by":"publisher","first-page":"1074","DOI":"10.3390\/s20041074","volume":"20","author":"W Chen","year":"2020","unstructured":"Chen W, Yu C, Tu C, Lyu Z, Tang J, Ou S, Fu Y, Xue Z (2020) A Survey on hand pose estimation with wearable sensors and computer-vision-based methods. Sensors 20(4):1074. https:\/\/doi.org\/10.3390\/s20041074","journal-title":"Sensors"},{"key":"860_CR5","doi-asserted-by":"crossref","unstructured":"Chen Y, Tu Z, Ge L, Zhang D, Chen R, Yuan J (2019) SO-HandNet: self-organizing network for 3D hand pose estimation with semi-supervised learning. In: Proceedings of the IEEE\/CVF international conference on computer vision, 6961\u20136970","DOI":"10.1109\/ICCV.2019.00706"},{"key":"860_CR6","doi-asserted-by":"publisher","unstructured":"Chen Y, Tu Z, Kang D, Bao L, Zhang Y, Zhe X, Chen R, Yuan J (2021) Model-based 3D Hand Reconstruction via Self-Supervised Learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 10451\u201310460. https:\/\/doi.org\/10.48550\/arXiv.2103.11703","DOI":"10.48550\/arXiv.2103.11703"},{"key":"860_CR7","doi-asserted-by":"publisher","unstructured":"Cheng W, Park JH, Ko JH (2021) HandFoldingNet: A 3D hand pose estimation network using multiscale-feature guided folding of a 2D hand skeleton. In: Proceedings of the IEEE\/CVF international conference on computer vision, 11260\u201311269. https:\/\/doi.org\/10.48550\/arXiv.2108.05545","DOI":"10.48550\/arXiv.2108.05545"},{"key":"860_CR8","doi-asserted-by":"publisher","unstructured":"Doosti B, Naha S, Mirbagheri M, Crandall DJ (2020) Hope-net: a graph-based model for hand-object pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6608\u20136617. https:\/\/doi.org\/10.48550\/arXiv.2004.00060","DOI":"10.48550\/arXiv.2004.00060"},{"issue":"1\u20132","key":"860_CR9","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.cviu.2006.10.012","volume":"108","author":"A Erol","year":"2007","unstructured":"Erol A, Bebis G, Nicolescu M, Boyle RD, Twombly X (2007) Vision-based hand pose estimation: a review. Comput vis Image Underst 108(1\u20132):52\u201373. https:\/\/doi.org\/10.1016\/j.cviu.2006.10.012","journal-title":"Comput vis Image Underst"},{"key":"860_CR10","doi-asserted-by":"publisher","unstructured":"Fang L, Liu X, Liu L, Xu H, Kang W (2020) JGR-P2O: Joint graph reasoning based pixel-to-offset prediction network for 3D hand pose estimation from a single depth image. In: European Conference Computer Vision, pp 120\u2013137. https:\/\/doi.org\/10.48550\/arXiv.2007.04646","DOI":"10.48550\/arXiv.2007.04646"},{"key":"860_CR11","doi-asserted-by":"publisher","unstructured":"Garcia-Hernando G, Yuan S, Baek S, Kim TK (2018) First-person hand action benchmark with RGB-D videos and 3D hand pose annotations. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 409\u2013419. https:\/\/doi.org\/10.48550\/arXiv.1704.0246","DOI":"10.48550\/arXiv.1704.0246"},{"issue":"3","key":"860_CR12","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1145\/292834.292849","volume":"5","author":"K Hinckley","year":"1998","unstructured":"Hinckley K, Pausch R, Proffitt D, Kassell NF (1998a) Two-handed virtual manipulation. ACM Trans Comput Hum Interact 5(3):260\u2013302. https:\/\/doi.org\/10.1145\/292834.292849","journal-title":"ACM Trans Comput Hum Interact"},{"issue":"3","key":"860_CR13","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1145\/292834.292849","volume":"5","author":"K Hinckley","year":"1998","unstructured":"Hinckley K, Pausch R, Proffitt D, Kassell NF (1998b) Two-handed virtual manipulation. ACM Trans Comput Hum Interact (TOCHI) 5(3):260\u2013302. https:\/\/doi.org\/10.1145\/292834.292849","journal-title":"ACM Trans Comput Hum Interact (TOCHI)"},{"key":"860_CR14","doi-asserted-by":"publisher","unstructured":"Hinckley K, Pausch R, Proffitt D (1997) Attention and visual feedback: the bimanual frame of reference. In: Proceedings of the 1997 symposium on interactive 3D graphics, Providence, Rhode Island, USA. 121\u2013ff. https:\/\/doi.org\/10.1145\/253284.253318","DOI":"10.1145\/253284.253318"},{"key":"860_CR15","doi-asserted-by":"publisher","unstructured":"Huber PJ (1992) Robust estimation of a location parameter. In: Breakthroughs in statistics, pp 492\u2013518. https:\/\/doi.org\/10.1007\/978-1-4612-4380-9_35","DOI":"10.1007\/978-1-4612-4380-9_35"},{"key":"860_CR16","doi-asserted-by":"publisher","unstructured":"Insafutdinov E, Pishchulin L, Andres B, Andriluka M, Schiele B (2016) DeeperCut: a deeper, stronger, and faster multi-person pose estimation model. In: European conference on computer vision, pp 34\u201350. https:\/\/doi.org\/10.48550\/arXiv.1605.03170","DOI":"10.48550\/arXiv.1605.03170"},{"key":"860_CR17","doi-asserted-by":"publisher","unstructured":"Kotranza A, Quarles J, Lok B (2006) Mixed reality: are two hands better than one?. In: Proceedings of the ACM symposium on virtual reality software and technology, Limassol, Cyprus. pp 31\u201334. https:\/\/doi.org\/10.1145\/1180495.1180503","DOI":"10.1145\/1180495.1180503"},{"key":"860_CR18","doi-asserted-by":"publisher","unstructured":"Lin F, Wilhelm C, Martinez T (2021) Two-hand global 3D pose estimation using monocular RGB. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 2373\u20132381. https:\/\/doi.org\/10.48550\/arXiv.2006.01320","DOI":"10.48550\/arXiv.2006.01320"},{"key":"860_CR19","doi-asserted-by":"publisher","unstructured":"Liu S Jiang H, Xu J, Liu S, Wang X (2021) Semi-supervised 3D hand-object poses estimation with interactions in time. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14687\u201314697. https:\/\/doi.org\/10.48550\/arXiv.2106.05266","DOI":"10.48550\/arXiv.2106.05266"},{"key":"860_CR20","doi-asserted-by":"publisher","unstructured":"Moon G, Chang JY, Lee KM (2018) V2V-PoseNet: voxel-to-voxel prediction network for accurate 3D hand and human pose estimation from a single depth map. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5079\u20135088. https:\/\/doi.org\/10.48550\/arXiv.1711.07399","DOI":"10.48550\/arXiv.1711.07399"},{"key":"860_CR21","doi-asserted-by":"publisher","unstructured":"Mueller F, Mehta D, Sotnychenko O, Sridhar S, Casas D, Theobalt C (2017) Real-time hand tracking under occlusion from an egocentric RGB-D sensor. In: Proceedings of the IEEE international conference on computer vision, pp 1154\u20131163. https:\/\/doi.org\/10.48550\/arXiv.1704.02201","DOI":"10.48550\/arXiv.1704.02201"},{"key":"860_CR22","doi-asserted-by":"publisher","unstructured":"Mueller F, Bernard F, Sotnychenko O, Mehta D, Sridhar S, Casas D, Theobalt C (2018) GANerated hands for real-time 3D hand tracking from monocular RGB. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 49\u201359. https:\/\/doi.org\/10.48550\/arXiv.1712.01057","DOI":"10.48550\/arXiv.1712.01057"},{"key":"860_CR23","doi-asserted-by":"publisher","unstructured":"Pishchulin L, Insafutdinov E, Tang S, Andres B, Andriluka M, Gehler PV, Schiele B (2016) DeepCut: joint subset partition and labeling for multi person pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4929\u20134937. https:\/\/doi.org\/10.48550\/arXiv.1511.06645","DOI":"10.48550\/arXiv.1511.06645"},{"key":"860_CR24","doi-asserted-by":"publisher","unstructured":"Rad M, Oberweger M, Lepetit V (2018) Feature mapping for learning fast and accurate 3D pose inference from synthetic images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4663\u20134672. https:\/\/doi.org\/10.48550\/arXiv.1712.03904","DOI":"10.48550\/arXiv.1712.03904"},{"key":"860_CR25","doi-asserted-by":"publisher","unstructured":"Ren P, Sun H, Hao J, Wang J, Qi Q, Liao J (2022) Mining multi-view information: a strong self-supervised framework for depth-based 3D hand pose and mesh estimation.\u202fIn: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 20555\u201320565. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01990","DOI":"10.1109\/CVPR52688.2022.01990"},{"issue":"6","key":"860_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.48550\/arXiv.1609.07306","volume":"35","author":"H Rhodin","year":"2016","unstructured":"Rhodin H, Richardt C, Casas D, Insafutdinov E, Shafiei M, Seidel H-P, Schiele B, Theobalt C (2016) EgoCap: egocentric marker-less motion capture with two fisheye cameras. ACM Trans Grap 35(6):1\u201311. https:\/\/doi.org\/10.48550\/arXiv.1609.07306","journal-title":"ACM Trans Grap"},{"key":"860_CR27","doi-asserted-by":"publisher","unstructured":"Rudnev V, Golyanik V, Wang J, Seidel HP, Mueller F, Elgharib M, Theobalt C (2021) Real-time neural 3D hand pose estimation from an event stream. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2385\u201312395. https:\/\/doi.org\/10.48550\/arXiv.2012.06475","DOI":"10.48550\/arXiv.2012.06475"},{"key":"860_CR28","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/CVPR.2013.471","volume":"2013","author":"B Sapp","year":"2013","unstructured":"Sapp B, Taskar B (2013) MODEC: multimodal decomposable models for human pose estimation. IEEE Conf Comput vis Pattern Recognit 2013:23\u201328. https:\/\/doi.org\/10.1109\/CVPR.2013.471","journal-title":"IEEE Conf Comput vis Pattern Recognit"},{"key":"860_CR29","doi-asserted-by":"publisher","unstructured":"Spurr A, Dahiya A, Wang X, Zhang X, Hilliges O (2021) Self-supervised 3D hand pose estimation from monocular RGB via contrastive learning.In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11230\u201311239. https:\/\/doi.org\/10.48550\/arXiv.2106.05953","DOI":"10.48550\/arXiv.2106.05953"},{"issue":"5","key":"860_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2629500","volume":"33","author":"J Tompson","year":"2014","unstructured":"Tompson J, Stein M, Lecun Y, Perlin K (2014) Real-time continuous pose recovery of human hands using convolutional networks. ACM Trans Grap 33(5):1\u201310. https:\/\/doi.org\/10.1145\/2629500","journal-title":"ACM Trans Grap"},{"key":"860_CR31","doi-asserted-by":"publisher","first-page":"102755","DOI":"10.1016\/j.ijhcs.2021.102755","volume":"159","author":"P Vogiatzidakis","year":"2022","unstructured":"Vogiatzidakis P, Koutsabasis P (2022) \u2018Address and command\u2019: two-handed mid-air interactions with multiple home devices. Int J Hum Comput Stud 159:102755. https:\/\/doi.org\/10.1016\/j.ijhcs.2021.102755","journal-title":"Int J Hum Comput Stud"},{"key":"860_CR32","doi-asserted-by":"publisher","unstructured":"Voigt-Antons J N, Kojic T, Ali D, M\u00f6ller S (2020) Influence of hand tracking as a way of interaction in virtual reality on user experience. In: 2020 Twelfth international conference on quality of multimedia experience (QoMEX), Athlone, Ireland, pp 1\u20134. https:\/\/doi.org\/10.1109\/QoMEX48832.2020.9123085","DOI":"10.1109\/QoMEX48832.2020.9123085"},{"key":"860_CR33","doi-asserted-by":"publisher","unstructured":"Wei SE, Ramakrishna V, Kanade T, Sheikh Y (2016) Convolutional pose machines. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 4724\u20134732. https:\/\/doi.org\/10.48550\/arXiv.1602.00134","DOI":"10.48550\/arXiv.1602.00134"},{"key":"860_CR34","doi-asserted-by":"publisher","unstructured":"Xiong F, Zhang B, Xiao Y, Cao Z, Yu T, Zhou JT, Yuan J (2019) A2J: anchor-to-joint regression network for 3D articulated pose estimation from a single depth image. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 793\u2013802. https:\/\/doi.org\/10.48550\/arXiv.1908.09999","DOI":"10.48550\/arXiv.1908.09999"},{"key":"860_CR35","doi-asserted-by":"publisher","unstructured":"Yang L, Li S, Lee D, Yao A (2019) Aligning latent spaces for 3D hand pose estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2335\u20132343. https:\/\/doi.org\/10.1109\/ICCV.2019.00242","DOI":"10.1109\/ICCV.2019.00242"},{"key":"860_CR36","doi-asserted-by":"publisher","unstructured":"Yang L, Li K, Zhan X, Lv J, Xu W, Li J, Lu C (2022) ArtiBoost: boosting articulated 3D hand-object pose estimation via online exploration and synthesis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2750\u20132760. https:\/\/doi.org\/10.48550\/arXiv.2109.05488","DOI":"10.48550\/arXiv.2109.05488"},{"key":"860_CR37","doi-asserted-by":"publisher","unstructured":"Zhao Z, Zhao X, Wang Y (2021) TravelNet: self-supervised physically plausible hand motion learning from monocular color images. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11666\u201311676. https:\/\/doi.org\/10.1109\/ICCV48922.2021.01146","DOI":"10.1109\/ICCV48922.2021.01146"}],"container-title":["Virtual Reality"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10055-023-00860-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10055-023-00860-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10055-023-00860-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T10:17:34Z","timestamp":1701253054000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10055-023-00860-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,15]]},"references-count":37,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["860"],"URL":"https:\/\/doi.org\/10.1007\/s10055-023-00860-6","relation":{},"ISSN":["1359-4338","1434-9957"],"issn-type":[{"type":"print","value":"1359-4338"},{"type":"electronic","value":"1434-9957"}],"subject":[],"published":{"date-parts":[[2023,9,15]]},"assertion":[{"value":"2 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 September 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or nonfinancial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}