{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T21:42:55Z","timestamp":1773870175174,"version":"3.50.1"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.eswa.2026.131949","type":"journal-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T09:41:33Z","timestamp":1773049293000},"page":"131949","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["DEPTH: Disentangled embeddings and priors via two-stage heterogeneous-fusion"],"prefix":"10.1016","volume":"318","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2393-4733","authenticated-orcid":false,"given":"Kuo","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1095-3972","authenticated-orcid":false,"given":"Wei","family":"Jin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3688-6766","authenticated-orcid":false,"given":"Nannan","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0814-3121","authenticated-orcid":false,"given":"Kan","family":"Huang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.131949_bib0001","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1067","article-title":"Pushing the envelope for RGB-based dense 3D hand pose estimation via neural rendering","author":"Baek","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0002","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10843","article-title":"3D hand shape and pose from images in the wild","author":"Boukhayma","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0003","doi-asserted-by":"crossref","first-page":"3739","DOI":"10.1109\/TPAMI.2020.2993627","article-title":"3D hand pose estimation using synthetic data and weakly labeled RGB images","volume":"43","author":"Cai","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.131949_bib0004","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"666","article-title":"Weakly-supervised 3D hand pose estimation from monocular RGB images","author":"Cai","year":"2018"},{"key":"10.1016\/j.eswa.2026.131949_bib0005","series-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision","first-page":"411","article-title":"DGGAN: Depth-image guided generative adversarial networks for disentangling RGB and depth images in 3D hand pose estimation","author":"Chen","year":"2020"},{"key":"10.1016\/j.eswa.2026.131949_bib0006","doi-asserted-by":"crossref","unstructured":"Chen, P., Chen, Y., & Yang, D. et al. I2UV-HandNet: Image-to-UV prediction network for accurate and high-fidelity 3D hand mesh modelingProceedings of the IEEE\/CVF international conference on computer vision. 12929-12938. (vol. 2021).","DOI":"10.1109\/ICCV48922.2021.01269"},{"key":"10.1016\/j.eswa.2026.131949_bib0007","doi-asserted-by":"crossref","unstructured":"Chen, X., Song, Z., Jiang, X.,et al. Handos: 3D hand reconstruction in one stage. Proceedings of the Computer Vision and Pattern Recognition Conference, 2025, 17304\u201317314.","DOI":"10.1109\/CVPR52734.2025.01613"},{"key":"10.1016\/j.eswa.2026.131949_bib0008","unstructured":"An, S., Dai, S., & Ansari, M. et al. (2025). ReJSHand: Efficient real-time hand pose estimation and mesh reconstruction using refined joint and skeleton features. Technical ReportarXiv: 2503.05995."},{"key":"10.1016\/j.eswa.2026.131949_bib0009","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10833","article-title":"3D hand shape and pose estimation from a single RGB image","author":"Ge","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0010","doi-asserted-by":"crossref","DOI":"10.1097\/JS9.0000000000004413","article-title":"Current concerns and future directions of large language model ChatGPT in medicine: A machine-learning-driven global-scale bibliometric analysis","author":"Guo","year":"2025","journal-title":"International Journal of Surgery"},{"issue":"1","key":"10.1016\/j.eswa.2026.131949_bib0011","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1186\/s12943-024-02140-6","article-title":"Artificial intelligence alphafold model for molecular biology and drug discovery: A machine-learning-driven informatics investigation","volume":"23","author":"Guo","year":"2024","journal-title":"Molecular Cancer"},{"key":"10.1016\/j.eswa.2026.131949_bib0012","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11807","article-title":"Learning joint reconstruction of hands and manipulated objects","author":"Hasson","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0013","series-title":"CVPR","article-title":"Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification","author":"He","year":"2015"},{"issue":"4","key":"10.1016\/j.eswa.2026.131949_bib0014","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3197517.3201302","article-title":"Robust solving of optical motion capture data by denoising","volume":"37","author":"Holden","year":"2018","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"10.1016\/j.eswa.2026.131949_bib0015","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"118","article-title":"Hand pose estimation via latent 2.5D heatmap regression","author":"Iqbal","year":"2018"},{"key":"10.1016\/j.eswa.2026.131949_bib0016","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8846","article-title":"A2j-transformer: Anchor-to-joint transformer network for 3D interacting hand pose estimation from a single RGB image","author":"Jiang","year":"2023"},{"key":"10.1016\/j.eswa.2026.131949_bib0017","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"21169","article-title":"Im2hands: Learning attentive implicit representation of interacting two-hand shapes","author":"Lee","year":"2023"},{"key":"10.1016\/j.eswa.2026.131949_bib0018","first-page":"2752","article-title":"Multi-task deep learning for real-time 3D human pose estimation and action recognition","volume":"43","author":"Luvizon","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"12","key":"10.1016\/j.eswa.2026.131949_bib0019","doi-asserted-by":"crossref","first-page":"8962","DOI":"10.1109\/TPAMI.2021.3122874","article-title":"HandVoxNet : 3D hand shape and pose estimation using voxel-based neural networks","author":"Malik","year":"2021","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.131949_bib0020","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition.","first-page":"5079-5088","article-title":"V2V-PoseNet: Voxel-to-voxel prediction network for accurate 3D hand and human pose estimation from a single depth map","author":"Moon","year":"2018"},{"key":"10.1016\/j.eswa.2026.131949_bib0021","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"49-59","article-title":"Ganerated hands for real-time 3D hand tracking from monocular RGB","author":"Mueller","year":"2018"},{"key":"10.1016\/j.eswa.2026.131949_bib0022","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"1154-1163","article-title":"Real-time hand tracking under occlusion from an egocentric RGB-D sensor","author":"Mueller","year":"2017"},{"key":"10.1016\/j.eswa.2026.131949_bib0023","series-title":"Asian conference on computer vision","article-title":"Markerless and efficient 26-DoF hand pose recovery","author":"Oikonomidis","year":"2010"},{"key":"10.1016\/j.eswa.2026.131949_bib0024","series-title":"2018 IEEE Winter Conference on Applications of Computer Vision (WACV)","first-page":"436-445","article-title":"Using a single RGB frame for real time 3D hand pose estimation in the wild","author":"Panteleris","year":"2018"},{"key":"10.1016\/j.eswa.2026.131949_bib0025","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9826","article-title":"Reconstructing hands in 3D with transformers","author":"Pavlakos","year":"2024"},{"key":"10.1016\/j.eswa.2026.131949_bib0026","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition.","first-page":"7025-7034","article-title":"Coarse-to-fine volumetric prediction for single-image 3D human pose","author":"Pavlakos","year":"2017"},{"key":"10.1016\/j.eswa.2026.131949_bib0027","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","article-title":"Realtime and robust hand tracking from depth","author":"Qian","year":"2014"},{"key":"10.1016\/j.eswa.2026.131949_bib0028","unstructured":"Romero, J., Tzionas, D., & Black, M. (2022). Embodied hands: Modeling and capturing hands and bodies together. Technical ReportarXiv: 2201.02610."},{"key":"10.1016\/j.eswa.2026.131949_bib0029","series-title":"Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference","first-page":"234","article-title":"U-Net: Convolutional networks for biomedical image segmentation","author":"Ronneberger","year":"2015"},{"key":"10.1016\/j.eswa.2026.131949_bib0030","doi-asserted-by":"crossref","unstructured":"Spurr, A., Song, J., Park, S., et al. Cross-modal deep variational hand pose estimationProceedings of the IEEE conference on computer vision and pattern recognition(vol. 2018). 89-98.","DOI":"10.1109\/CVPR.2018.00017"},{"issue":"5","key":"10.1016\/j.eswa.2026.131949_bib0031","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2629500","article-title":"Real-time continuous pose recovery of human hands using convolutional networks","volume":"33","author":"Tompson","year":"2014","journal-title":"ACM Transactions on Graphics (ToG)"},{"key":"10.1016\/j.eswa.2026.131949_bib0032","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10965","article-title":"Monocular total capture: Posing face, body, and hands in the wild","author":"Xiang","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0033","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"16105","article-title":"Graph stacked hourglass networks for 3D human pose estimation","author":"Xu","year":"2021"},{"key":"10.1016\/j.eswa.2026.131949_bib0034","doi-asserted-by":"crossref","unstructured":"Yang, L., Li, J., Xu, W., et al. (2020). Bihand: Recovering hand mesh with multi-stage bisected hourglass networks. Technical ReportarXiv: 2008.05079.","DOI":"10.5244\/C.34.71"},{"key":"10.1016\/j.eswa.2026.131949_bib0035","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"2335-2343","article-title":"Aligning latent spaces for 3D hand pose estimation","author":"Yang","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0036","series-title":"2017 IEEE international conference on image processing","first-page":"982","article-title":"A hand pose tracking benchmark from stereo matching","author":"Zhang","year":"2017"},{"key":"10.1016\/j.eswa.2026.131949_bib0037","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"2354","article-title":"End-to-end hand mesh recovery from a monocular RGB image","author":"Zhang","year":"2019"},{"key":"10.1016\/j.eswa.2026.131949_bib0038","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"5346","article-title":"Monocular real-time hand shape and motion capture using multi-modal data","author":"Zhou","year":"2020"},{"key":"10.1016\/j.eswa.2026.131949_bib0039","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1367","article-title":"A simple baseline for efficient hand mesh reconstruction","author":"Zhou","year":"2024"},{"key":"10.1016\/j.eswa.2026.131949_bib0040","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"4903-4911","article-title":"Learning to estimate 3D hand pose from single RGB images","author":"Zimmermann","year":"2017"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426008626?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426008626?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T14:22:23Z","timestamp":1773843743000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426008626"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":40,"alternative-id":["S0957417426008626"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131949","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"DEPTH: Disentangled embeddings and priors via two-stage heterogeneous-fusion","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131949","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131949"}}