{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:45:56Z","timestamp":1778082356558,"version":"3.51.4"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031783531","type":"print"},{"value":"9783031783548","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78354-8_12","type":"book-chapter","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T10:31:08Z","timestamp":1733221868000},"page":"178-193","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["SHARP: Segmentation of\u00a0Hands and\u00a0Arms by\u00a0Range Using Pseudo-depth for\u00a0Enhanced Egocentric 3D Hand Pose Estimation and\u00a0Action Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6048-3425","authenticated-orcid":false,"given":"Wiktor","family":"Mucha","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5918-9029","authenticated-orcid":false,"given":"Michael","family":"Wray","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5217-2854","authenticated-orcid":false,"given":"Martin","family":"Kampel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"12_CR1","doi-asserted-by":"publisher","unstructured":"Aboukhadra, A., Malik, J., Elhayek, A., Robertini, N., Stricker, D.: THOR-Net: end-to-end graformer-based realistic two hands and object reconstruction with self-supervision. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1001\u20131010 (2023). https:\/\/doi.org\/10.1109\/WACV56688.2023.00106","DOI":"10.1109\/WACV56688.2023.00106"},{"key":"12_CR2","doi-asserted-by":"publisher","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.502","DOI":"10.1109\/CVPR.2017.502"},{"key":"12_CR3","unstructured":"Cartas, A., Radeva, P., Dimiccoli, M.: Contextually driven first-person action recognition from videos. In: Presentation at EPIC@ ICCV2017 Workshop, p.\u00a08 (2017)"},{"key":"12_CR4","unstructured":"Chen, W., Fu, Z., Yang, D., Deng, J.: Single-image depth perception in the wild. In: Advances in Neural Information Processing Systems 29 (2016)"},{"key":"12_CR5","doi-asserted-by":"publisher","unstructured":"Cho, H., Kim, C., Kim, J., Lee, S., Ismayilzada, E., Baek, S.: Transformer-based unified recognition of two hands manipulating objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4769\u20134778 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00462","DOI":"10.1109\/CVPR52729.2023.00462"},{"key":"12_CR6","doi-asserted-by":"publisher","unstructured":"Damen, D., et al.: Scaling egocentric vision: the EPIC-KITCHENS dataset. In: European Conference on Computer Vision (ECCV) (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_44","DOI":"10.1007\/978-3-030-01225-0_44"},{"key":"12_CR7","doi-asserted-by":"publisher","unstructured":"Das, P., Ortega, A.: Symmetric sub-graph spatio-temporal graph convolution and its application in complex activity recognition. In: ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3215\u20133219. IEEE (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9413833","DOI":"10.1109\/ICASSP39728.2021.9413833"},{"key":"12_CR8","unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021)"},{"key":"12_CR9","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in Neural Information Processing Systems 27 (2014)"},{"key":"12_CR10","doi-asserted-by":"publisher","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00630","DOI":"10.1109\/ICCV.2019.00630"},{"key":"12_CR11","doi-asserted-by":"publisher","unstructured":"Garcia-Hernando, G., Yuan, S., Baek, S., Kim, T.K.: First-person hand action benchmark with RGB-D videos and 3D hand pose annotations. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 409\u2013419 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00050","DOI":"10.1109\/CVPR.2018.00050"},{"key":"12_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-46484-8_45","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Garg","year":"2016","unstructured":"Garg, R., B.G., V.K., Carneiro, G., Reid, I.: Unsupervised CNN for single view depth estimation: geometry to the rescue. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 740\u2013756. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_45"},{"key":"12_CR13","doi-asserted-by":"publisher","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3828\u20133838 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00393","DOI":"10.1109\/ICCV.2019.00393"},{"key":"12_CR14","doi-asserted-by":"publisher","unstructured":"Grauman, K., et\u00a0al.: Ego4D: around the world in 3,000 hours of egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18995\u201319012 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01842","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"12_CR15","doi-asserted-by":"publisher","unstructured":"Hasson, Y., Tekin, B., Bogo, F., Laptev, I., Pollefeys, M., Schmid, C.: Leveraging photometric consistency over time for sparsely supervised hand-object reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 571\u2013580 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00065","DOI":"10.1109\/CVPR42600.2020.00065"},{"key":"12_CR16","doi-asserted-by":"publisher","unstructured":"Kwon, T., Tekin, B., St\u00fchmer, J., Bogo, F., Pollefeys, M.: H2O: two hands manipulating objects for first person interaction recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10138\u201310148 (2021). https:\/\/doi.org\/10.1109\/iccv48922.2021.00998","DOI":"10.1109\/iccv48922.2021.00998"},{"key":"12_CR17","doi-asserted-by":"publisher","unstructured":"Mucha, W., Cuconasu, F., Etori, N.A., Kalokyri, V., Trappolini, G.: TEXT2TASTE: a versatile egocentric vision system for intelligent reading assistance using large language model. In: Computers Helping People with Special Needs, pp. 285\u2013291. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-62849-8_35","DOI":"10.1007\/978-3-031-62849-8_35"},{"key":"12_CR18","doi-asserted-by":"publisher","unstructured":"Mucha, W., Kampel, M.: In my perspective, in my hands: accurate egocentric 2D hand pose and action recognition. In: 2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG), pp.\u00a01\u20139 (2024). https:\/\/doi.org\/10.1109\/FG59268.2024.10582035","DOI":"10.1109\/FG59268.2024.10582035"},{"key":"12_CR19","doi-asserted-by":"publisher","unstructured":"Mueller, F., Mehta, D., Sotnychenko, O., Sridhar, S., Casas, D., Theobalt, C.: Real-time hand tracking under occlusion from an egocentric RGB-D sensor. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1154\u20131163 (2017). https:\/\/doi.org\/10.1109\/CVPR.2019.01231","DOI":"10.1109\/CVPR.2019.01231"},{"key":"12_CR20","doi-asserted-by":"publisher","unstructured":"Nguyen, X.S., Brun, L., L\u00e9zoray, O., Bougleux, S.: A neural network based on SPD manifold learning for skeleton-based hand gesture recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12036\u201312045 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.01231","DOI":"10.1109\/CVPR.2019.01231"},{"key":"12_CR21","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.neucom.2021.11.081","volume":"472","author":"A N\u00fa\u00f1ez-Marcos","year":"2022","unstructured":"N\u00fa\u00f1ez-Marcos, A., Azkune, G., Arganda-Carreras, I.: Egocentric vision-based action recognition: a survey. Neurocomputing 472, 175\u2013197 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2021.11.081","journal-title":"Neurocomputing"},{"key":"12_CR22","doi-asserted-by":"publisher","unstructured":"Ohkawa, T., He, K., Sener, F., Hodan, T., Tran, L., Keskin, C.: AssemblyHands: towards egocentric activity understanding via 3D hand pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12999\u201313008 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01249","DOI":"10.1109\/CVPR52729.2023.01249"},{"key":"12_CR23","doi-asserted-by":"publisher","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.01196","DOI":"10.1109\/ICCV48922.2021.01196"},{"issue":"3","key":"12_CR24","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1109\/TPAMI.2020.3019967","volume":"44","author":"R Ranftl","year":"2020","unstructured":"Ranftl, R., Lasinger, K., Hafner, D., Schindler, K., Koltun, V.: Towards robust monocular depth estimation: mixing datasets for zero-shot cross-dataset transfer. IEEE Trans. Pattern Anal. Mach. Intell. 44(3), 1623\u20131637 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2020.3019967","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR25","unstructured":"Tan, M., Le, Q.: EfficientNetV2: smaller models and faster training. In: International Conference on Machine Learning, pp. 10096\u201310106. PMLR (2021)"},{"key":"12_CR26","doi-asserted-by":"publisher","unstructured":"Tekin, B., Bogo, F., Pollefeys, M.: H+O: unified egocentric recognition of 3D hand-object poses and interactions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4511\u20134520 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00464","DOI":"10.1109\/CVPR.2019.00464"},{"key":"12_CR27","doi-asserted-by":"publisher","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023). https:\/\/doi.org\/10.48550\/arXiv.2207.02696","DOI":"10.48550\/arXiv.2207.02696"},{"key":"12_CR28","doi-asserted-by":"publisher","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00813","DOI":"10.1109\/CVPR.2018.00813"},{"key":"12_CR29","doi-asserted-by":"publisher","unstructured":"Wang, X., et al.: HoloAssist: an egocentric human interaction dataset for interactive AI assistants in the real world. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 20270\u201320281 (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.01854","DOI":"10.1109\/ICCV51070.2023.01854"},{"key":"12_CR30","doi-asserted-by":"publisher","unstructured":"Wen, Y., Pan, H., Yang, L., Pan, J., Komura, T., Wang, W.: Hierarchical temporal transformer for 3D hand pose estimation and action recognition from egocentric RGB videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21243\u201321253 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.02035","DOI":"10.1109\/CVPR52729.2023.02035"},{"key":"12_CR31","doi-asserted-by":"publisher","unstructured":"Yamazaki, W., Ding, M., Takamatsu, J., Ogasawara, T.: Hand pose estimation and motion recognition using egocentric RGB-D video. In: 2017 IEEE International Conference on Robotics and Biomimetics (ROBIO), pp. 147\u2013152. IEEE (2017). https:\/\/doi.org\/10.1109\/ROBIO.2017.8324409","DOI":"10.1109\/ROBIO.2017.8324409"},{"key":"12_CR32","doi-asserted-by":"publisher","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018). https:\/\/doi.org\/10.1609\/aaai.v32i1.12328","DOI":"10.1609\/aaai.v32i1.12328"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78354-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T11:28:48Z","timestamp":1733225328000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78354-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"ISBN":["9783031783531","9783031783548"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78354-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"4 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}