{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T12:22:39Z","timestamp":1777897359543,"version":"3.51.4"},"reference-count":50,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100011796","name":"Beijing Polytechnic College","doi-asserted-by":"publisher","award":["BGY2022KY-01Z"],"award-info":[{"award-number":["BGY2022KY-01Z"]}],"id":[{"id":"10.13039\/501100011796","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62403017"],"award-info":[{"award-number":["62403017"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["4244088"],"award-info":[{"award-number":["4244088"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133333","type":"journal-article","created":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T23:50:44Z","timestamp":1773273044000},"page":"133333","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["DDCEFormer: Dual-domain cross enhanced transformer for 3D human pose estimation"],"prefix":"10.1016","volume":"681","author":[{"given":"Deliang","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanrong","family":"Ge","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ning","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7569-9894","authenticated-orcid":false,"given":"Rui","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133333_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128049","article-title":"Deep learning for 3D human pose estimation and mesh recovery: a survey","volume":"596","author":"Liu","year":"2024","journal-title":"Neurocomputing."},{"key":"10.1016\/j.neucom.2026.133333_bib0010","series-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems","first-page":"37928","article-title":"Learnability matters: active learning for video captioning","author":"Zhang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133333_bib0015","doi-asserted-by":"crossref","first-page":"2019","DOI":"10.1109\/TIP.2014.2311377","article-title":"Click prediction for web image reranking using multimodal sparse coding","volume":"23","author":"Yu","year":"2014","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2026.133333_bib0020","doi-asserted-by":"crossref","first-page":"1051","DOI":"10.1109\/TMM.2018.2818329","article-title":"Depth pooling based large-scale 3D action recognition with convolutional neural networks","volume":"20","author":"Wang","year":"2018","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2026.133333_bib0025","series-title":"Proceedings of the International Conference on Intelligent Robotics and Applications","first-page":"176","article-title":"3D human pose estimation in video for human-computer\/robot interaction","author":"Huo","year":"2023"},{"key":"10.1016\/j.neucom.2026.133333_bib0030","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3072959.3073596","article-title":"VNect: real-time 3D human pose estimation with a single RGB camera","volume":"36","author":"Mehta","year":"2017","journal-title":"ACM Trans. Graph."},{"key":"10.1016\/j.neucom.2026.133333_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128947","article-title":"ICFNet: interactive-complementary fusion network for monocular 3D human pose estimation","volume":"616","author":"Wang","year":"2025","journal-title":"Neurocomputing."},{"key":"10.1016\/j.neucom.2026.133333_bib0040","doi-asserted-by":"crossref","first-page":"198","DOI":"10.1109\/TCSVT.2021.3057267","article-title":"Anatomy-aware 3D human pose estimation with bone-based pose decomposition","volume":"32","author":"Chen","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.neucom.2026.133333_bib0045","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5064","article-title":"Attention mechanism exploits temporal contexts: real-time 3D human pose reconstruction","author":"Liu","year":"2020"},{"key":"10.1016\/j.neucom.2026.133333_bib0050","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7753","article-title":"3D human pose estimation in video with temporal convolutions and semi-supervised training","author":"Pavllo","year":"2019"},{"key":"10.1016\/j.neucom.2026.133333_bib0055","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"2272","article-title":"Exploiting spatial-temporal relationships for 3D pose estimation via graph convolutional networks","author":"Cai","year":"2019"},{"key":"10.1016\/j.neucom.2026.133333_bib0060","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"764","article-title":"Motion guided 3D pose estimation from videos","author":"Wang","year":"2020"},{"key":"10.1016\/j.neucom.2026.133333_bib0065","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"68","article-title":"Exploiting temporal information for 3D human pose estimation","author":"Hossain","year":"2018"},{"key":"10.1016\/j.neucom.2026.133333_bib0070","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"810","article-title":"Recurrent 3D pose sequence machines","author":"Lin","year":"2017"},{"key":"10.1016\/j.neucom.2026.133333_bib0075","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"11656","article-title":"3D human pose estimation with spatial and temporal transformers","author":"Zheng","year":"2021"},{"key":"10.1016\/j.neucom.2026.133333_bib0080","doi-asserted-by":"crossref","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","article-title":"Exploiting temporal contexts with strided transformer for 3D human pose estimation","volume":"25","author":"Li","year":"2022","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2026.133333_bib0085","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109631","article-title":"Multi-hypothesis representation learning for transformer-based 3D human pose estimation","volume":"141","author":"Li","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133333_bib0090","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13232","article-title":"MixSTE: seq2seq mixed spatio-temporal encoder for 3D human pose estimation in video","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neucom.2026.133333_bib0095","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133333_bib0100","doi-asserted-by":"crossref","first-page":"4039","DOI":"10.1109\/TMM.2023.3321438","article-title":"Global and local spatio-temporal encoder for 3D human pose estimation","volume":"26","author":"Wang","year":"2024","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2026.133333_bib0105","doi-asserted-by":"crossref","first-page":"4278","DOI":"10.1109\/TIP.2022.3182269","article-title":"Boosting monocular 3D human pose estimation with part aware attention","volume":"31","author":"Xue","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2026.133333_bib0110","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110925","article-title":"GraphMLP: a graph mlp-like architecture for 3D human pose estimation","volume":"158","author":"Li","year":"2025","journal-title":"Pattern Recogn."},{"key":"10.1016\/j.neucom.2026.133333_bib0115","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4790","article-title":"3D human pose estimation with spatio-temporal criss-cross attention","author":"Tang","year":"2023"},{"key":"10.1016\/j.neucom.2026.133333_bib0120","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112239","article-title":"STGFormer: spatio-temporal graphformer for 3D human pose estimation in video","volume":"171","author":"Liu","year":"2026","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133333_bib0125","doi-asserted-by":"crossref","first-page":"6191","DOI":"10.1109\/TMM.2023.3347095","article-title":"Frame-padded multiscale transformer for monocular 3D human pose estimation","volume":"26","author":"Zhong","year":"2024","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2026.133333_bib0130","doi-asserted-by":"crossref","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","article-title":"Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments","volume":"36","author":"Ionescu","year":"2014","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133333_bib0135","series-title":"Proceedings of the International Conference on 3D Vision (3DV)","first-page":"506","article-title":"Monocular 3D human pose estimation in the wild using improved CNN supervision","author":"Mehta","year":"2017"},{"key":"10.1016\/j.neucom.2026.133333_bib0140","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7307","article-title":"Ordinal depth supervision for 3D human pose estimation","author":"Pavlakos","year":"2018"},{"key":"10.1016\/j.neucom.2026.133333_bib0145","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"529","article-title":"Integral human pose regression","author":"Sun","year":"2018"},{"key":"10.1016\/j.neucom.2026.133333_bib0150","doi-asserted-by":"crossref","first-page":"3000","DOI":"10.1109\/TPAMI.2021.3051173","article-title":"HEMlets PoSh: learning part-centric heatmap triplets for 3D human pose and shape estimation","volume":"44","author":"Zhou","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133333_bib0155","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2640","article-title":"A simple yet effective baseline for 3D human pose estimation","author":"Martinez","year":"2017"},{"key":"10.1016\/j.neucom.2026.133333_bib0160","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7782","article-title":"RepNet: weakly supervised training of an adversarial reprojection network for 3D human pose estimation","author":"Wandt","year":"2019"},{"key":"10.1016\/j.neucom.2026.133333_bib0165","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9887","article-title":"Generating multiple hypotheses for 3D human pose estimation with mixture density network","author":"Li","year":"2019"},{"key":"10.1016\/j.neucom.2026.133333_bib0170","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"483","article-title":"Stacked hourglass networks for human pose estimation","author":"Newell","year":"2016"},{"key":"10.1016\/j.neucom.2026.133333_bib0175","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7103","article-title":"Cascaded pyramid network for multi-person pose estimation","author":"Chen","year":"2018"},{"key":"10.1016\/j.neucom.2026.133333_bib0180","doi-asserted-by":"crossref","first-page":"7157","DOI":"10.1109\/TPAMI.2022.3222784","article-title":"AlphaPose: whole-body regional multi-person pose estimation and tracking in real-time","volume":"45","author":"Fang","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133333_bib0185","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5693","article-title":"Deep high-resolution representation learning for human pose estimation","author":"Sun","year":"2019"},{"key":"10.1016\/j.neucom.2026.133333_bib0190","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4966","article-title":"Sparseness meets deepness: 3D human pose estimation from monocular video","author":"Zhou","year":"2016"},{"key":"10.1016\/j.neucom.2026.133333_bib0195","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7025","article-title":"Coarse-to-fine volumetric prediction for single-image 3D human pose","author":"Pavlakos","year":"2017"},{"key":"10.1016\/j.neucom.2026.133333_bib0200","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"668","article-title":"Learning 3D human pose from structure and motion","author":"Dabral","year":"2018"},{"key":"10.1016\/j.neucom.2026.133333_bib0205","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"16105","article-title":"Graph stacked hourglass networks for 3D human pose estimation","author":"Xu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133333_bib0210","series-title":"Proceedings of the 29th ACM International Conference on Multimedia","first-page":"602","article-title":"Conditional directed graph convolution for 3D human pose estimation","author":"Hu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133333_bib0215","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1007\/s10462-024-11019-3","article-title":"A survey on deep 3D human pose estimation","volume":"58","author":"Neupane","year":"2024","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.neucom.2026.133333_bib0220","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"2903","article-title":"Uplift and upsample: efficient 3D human pose estimation with uplifting transformers","author":"Einfalt","year":"2023"},{"key":"10.1016\/j.neucom.2026.133333_bib0225","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"461","article-title":"P-STMO: pre-trained spatial temporal many-to-one model for 3D human pose estimation","author":"Shan","year":"2022"},{"key":"10.1016\/j.neucom.2026.133333_bib0230","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13147","article-title":"MHFormer: multi-hypothesis transformer for 3D human pose estimation","author":"Li","year":"2022"},{"key":"10.1016\/j.neucom.2026.133333_bib0235","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.131247","article-title":"Optimizing the temporal adjacency matrix for 3D human pose estimation through clustering","volume":"653","author":"Wang","year":"2025","journal-title":"Neurocomputing."},{"key":"10.1016\/j.neucom.2026.133333_bib0240","doi-asserted-by":"crossref","first-page":"2555","DOI":"10.1007\/s00371-023-02936-5","article-title":"ConvFormer: parameter reduction in transformer models for 3D human pose estimation by leveraging dynamic multi-headed convolutional attention","volume":"40","author":"Diaz-Arias","year":"2024","journal-title":"The Vis. Comput."},{"key":"10.1016\/j.neucom.2026.133333_bib0245","doi-asserted-by":"crossref","first-page":"12581","DOI":"10.1109\/TPAMI.2023.3282631","article-title":"UniFormer: unifying convolution and self-attention for visual recognition","volume":"45","author":"Li","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133333_bib0250","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"15085","article-title":"MotionBERT: a unified perspective on learning human motion representations","author":"Zhu","year":"2023"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007307?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007307?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T20:26:06Z","timestamp":1776975966000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226007307"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":50,"alternative-id":["S0925231226007307"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133333","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"DDCEFormer: Dual-domain cross enhanced transformer for 3D human pose estimation","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133333","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133333"}}