{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T14:02:06Z","timestamp":1780495326899,"version":"3.54.1"},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFB4709800"],"award-info":[{"award-number":["2024YFB4709800"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113530","type":"journal-article","created":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T09:20:48Z","timestamp":1773825648000},"page":"113530","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Adaptive learning from noisy estimated depth maps benefits monocular RGB-based 3D human pose estimation"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6332-8316","authenticated-orcid":false,"given":"Mengyuan","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9390-3343","authenticated-orcid":false,"given":"Jingting","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113530_bib0001","article-title":"FaTNET: feature-alignment transformer network for human pose transfer","volume":"165","author":"Luo","year":"2025","journal-title":"PR"},{"key":"10.1016\/j.patcog.2026.113530_bib0002","first-page":"0093","article-title":"Hybrid directed hypergraph learning and forecasting of skeleton-based human poses","volume":"5","author":"Cui","year":"2024","journal-title":"CBS"},{"key":"10.1016\/j.patcog.2026.113530_bib0003","first-page":"0102","article-title":"Merge-and-split graph convolutional network for skeleton-based interaction recognition","volume":"5","author":"Wang","year":"2024","journal-title":"CBS"},{"key":"10.1016\/j.patcog.2026.113530_bib0004","first-page":"0100","article-title":"A survey on 3d skeleton-based action recognition using learning method","volume":"5","author":"Ren","year":"2024","journal-title":"CBS"},{"key":"10.1016\/j.patcog.2026.113530_bib0005","series-title":"CVPR","first-page":"7103","article-title":"Cascaded pyramid network for multi-person pose estimation","author":"Chen","year":"2018"},{"key":"10.1016\/j.patcog.2026.113530_bib0006","series-title":"CVPR","first-page":"7035","article-title":"3D human pose estimation= 2d pose estimation+ matching","author":"Chen","year":"2017"},{"key":"10.1016\/j.patcog.2026.113530_bib0007","article-title":"DGFormer: Dynamic graph transformer for 3D human pose estimation","volume":"152","author":"Chen","year":"2024","journal-title":"PR"},{"key":"10.1016\/j.patcog.2026.113530_bib0008","article-title":"GraphMLP: a graph MLP-like architecture for 3D human pose estimation","volume":"158","author":"Li","year":"2025","journal-title":"PR"},{"key":"10.1016\/j.patcog.2026.113530_bib0009","article-title":"A single 2d pose with context is worth hundreds for 3d human pose estimation","volume":"36","author":"Zhao","year":"2023","journal-title":"NeurlPS"},{"key":"10.1016\/j.patcog.2026.113530_bib0010","series-title":"CVPR","first-page":"7307","article-title":"Ordinal depth supervision for 3d human pose estimation","author":"Pavlakos","year":"2018"},{"key":"10.1016\/j.patcog.2026.113530_bib0011","series-title":"ICCV","first-page":"3467","article-title":"Monocular 3d human pose estimation by predicting depth on joints","author":"Nie","year":"2017"},{"issue":"7","key":"10.1016\/j.patcog.2026.113530_bib0012","doi-asserted-by":"crossref","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","article-title":"Human3. 6m: large scale datasets and predictive methods for 3d human sensing in natural environments","volume":"36","author":"Ionescu","year":"2013","journal-title":"TPAMI"},{"key":"10.1016\/j.patcog.2026.113530_bib0013","series-title":"3DV","first-page":"506","article-title":"Monocular 3d human pose estimation in the wild using improved CNN supervision","author":"Mehta","year":"2017"},{"key":"10.1016\/j.patcog.2026.113530_bib0014","series-title":"CVPR","first-page":"15","article-title":"Human detection using depth information by kinect","author":"Xia","year":"2011"},{"key":"10.1016\/j.patcog.2026.113530_bib0015","series-title":"CVPR","first-page":"7025","article-title":"Coarse-to-fine volumetric prediction for single-image 3D human pose","author":"Pavlakos","year":"2017"},{"key":"10.1016\/j.patcog.2026.113530_bib0016","series-title":"CVPR","first-page":"5693","article-title":"Deep high-resolution representation learning for human pose estimation","author":"Sun","year":"2019"},{"key":"10.1016\/j.patcog.2026.113530_bib0017","series-title":"CVPR","first-page":"899","article-title":"Deep kinematics analysis for monocular 3d human pose estimation","author":"Xu","year":"2020"},{"issue":"1","key":"10.1016\/j.patcog.2026.113530_bib0018","first-page":"198","article-title":"Anatomy-aware 3d human pose estimation with bone-based pose decomposition","volume":"32","author":"Chen","year":"2021","journal-title":"TCSVT"},{"key":"10.1016\/j.patcog.2026.113530_bib0019","series-title":"ICCV","first-page":"14761","article-title":"Diffusion-based 3d human pose estimation with multi-hypothesis aggregation","author":"Shan","year":"2023"},{"key":"10.1016\/j.patcog.2026.113530_bib0020","series-title":"ICCV","first-page":"15977","article-title":"Diffpose: multi-hypothesis human pose estimation using diffusion models","author":"Holmquist","year":"2023"},{"key":"10.1016\/j.patcog.2026.113530_bib0021","series-title":"CVPR","first-page":"13041","article-title":"Diffpose: toward more reliable 3d pose estimation","author":"Gong","year":"2023"},{"key":"10.1016\/j.patcog.2026.113530_bib0022","series-title":"AAAI","first-page":"882","article-title":"Disentangled diffusion-based 3d human pose estimation with hierarchical spatial and temporal denoiser","volume":"38","author":"Cai","year":"2024"},{"key":"10.1016\/j.patcog.2026.113530_bib0023","first-page":"98717","article-title":"Di2Pose: discrete diffusion model for occluded 3D human pose estimation","volume":"37","author":"Wang","year":"2024","journal-title":"NeurlPS"},{"key":"10.1016\/j.patcog.2026.113530_bib0024","series-title":"WACV","first-page":"6142","article-title":"Back to optimization: diffusion-based zero-shot 3d human pose estimation","author":"Jiang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113530_bib0025","unstructured":"H. Zhang, L. Carlone, CHAMP: Conformalized 3D human multi-hypothesis pose estimators, in: ICLR (2025)."},{"issue":"10","key":"10.1016\/j.patcog.2026.113530_bib0026","doi-asserted-by":"crossref","first-page":"6327","DOI":"10.1109\/TPAMI.2021.3087695","article-title":"Monocular 3d pose estimation via pose grammar and data augmentation","volume":"44","author":"Xu","year":"2021","journal-title":"TPAMI"},{"key":"10.1016\/j.patcog.2026.113530_bib0027","series-title":"CVPR","first-page":"3425","article-title":"Semantic graph convolutional networks for 3d human pose regression","author":"Zhao","year":"2019"},{"key":"10.1016\/j.patcog.2026.113530_bib0028","article-title":"Complexity-aware dynamic gradient shifting: a novel soft supervision training strategy for 3D pose estimation and regression learning","author":"Hossain","year":"2025","journal-title":"TCSVT"},{"key":"10.1016\/j.patcog.2026.113530_bib0029","first-page":"185","article-title":"Depth-based 3D human pose refinement: evaluating the refinet framework","volume":"171","author":"D\u2019Eusanio","year":"2023","journal-title":"PRL"},{"key":"10.1016\/j.patcog.2026.113530_bib0030","article-title":"What uncertainties do we need in bayesian deep learning for computer vision?","volume":"30","author":"Kendall","year":"2017","journal-title":"NeurlPS"},{"key":"10.1016\/j.patcog.2026.113530_bib0031","series-title":"Eur. Conf. Comput. Vis.","first-page":"601","article-title":"Recovering accurate 3d human pose in the wild using imus and a moving camera","author":"Von Marcard","year":"2018"},{"key":"10.1016\/j.patcog.2026.113530_bib0032","series-title":"ECCV","first-page":"740","article-title":"Microsoft coco: common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.patcog.2026.113530_bib0033","series-title":"CVPR","first-page":"10371","article-title":"Depth anything: unleashing the power of large-scale unlabeled data","author":"Yang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113530_bib0034","series-title":"ICCV","first-page":"11656","article-title":"3D human pose estimation with spatial and temporal transformers","author":"Zheng","year":"2021"},{"issue":"12","key":"10.1016\/j.patcog.2026.113530_bib0035","first-page":"7282","article-title":"A geometric knowledge oriented single-frame 2D-to-3D human absolute pose estimation method","volume":"33","author":"Hu","year":"2023","journal-title":"TCSVT"},{"key":"10.1016\/j.patcog.2026.113530_bib0036","first-page":"21875","article-title":"Depth anything v2","volume":"37","author":"Yang","year":"2024","journal-title":"NeurlPS"},{"key":"10.1016\/j.patcog.2026.113530_bib0037","series-title":"WACV","first-page":"6920","article-title":"Motionagformer: enhancing 3d human pose estimation with a transformer-gcnformer network","author":"Mehraban","year":"2024"},{"key":"10.1016\/j.patcog.2026.113530_bib0038","article-title":"STGFormer: Spatio-temporal graphformer for 3D human pose estimation in video","author":"Liu","year":"2025","journal-title":"PR"},{"key":"10.1016\/j.patcog.2026.113530_bib0039","article-title":"Spectral compression transformer with line pose graph for monocular 3D human pose estimation","volume":"172","author":"Zheng","year":"2026","journal-title":"PR"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004966?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004966?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T13:00:21Z","timestamp":1780491621000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326004966"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":39,"alternative-id":["S0031320326004966"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113530","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Adaptive learning from noisy estimated depth maps benefits monocular RGB-based 3D human pose estimation","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113530","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113530"}}