{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T13:46:56Z","timestamp":1776347216905,"version":"3.51.2"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476065"],"award-info":[{"award-number":["62476065"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113618","type":"journal-article","created":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T16:05:26Z","timestamp":1774713926000},"page":"113618","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PB","title":["Learning robust representations for highly dynamic pose estimation via mixture of spatiotemporal experts"],"prefix":"10.1016","volume":"179","author":[{"given":"Jiangning","family":"Wei","sequence":"first","affiliation":[]},{"given":"Hao","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Lixiong","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Dandan","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Ke","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Liu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113618_bib0001","series-title":"2009 IEEE International Conference on Robotics and Automation","first-page":"3571","article-title":"Pose estimation and adaptive robot behaviour for human-robot interaction","author":"Svenstrup","year":"2009"},{"key":"10.1016\/j.patcog.2026.113618_bib0002","series-title":"IECON 2020 the 46th Annual Conference of the IEEE Industrial Electronics Society","first-page":"4769","article-title":"Multi-view human pose estimation in human-robot interaction","author":"Xu","year":"2020"},{"key":"10.1016\/j.patcog.2026.113618_bib0003","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5908","article-title":"Photo wake-up: 3D character animation from a single photo","author":"Weng","year":"2019"},{"issue":"3","key":"10.1016\/j.patcog.2026.113618_bib0004","first-page":"1","article-title":"Vid2Player: controllable video sprites that behave and appear like professional tennis players","volume":"40","author":"Zhang","year":"2021","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"10.1016\/j.patcog.2026.113618_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2021.103055","article-title":"Human pose estimation and its application to action recognition: a survey","volume":"76","author":"Song","year":"2021","journal-title":"J. Vis. Commun Image Represent."},{"key":"10.1016\/j.patcog.2026.113618_bib0006","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Spatial temporal graph convolutional networks for skeleton-based action recognition","volume":"Vol. 32","author":"Yan","year":"2018"},{"key":"10.1016\/j.patcog.2026.113618_bib0007","series-title":"Proceedings of the 27th ACM International Conference on Multimedia","first-page":"374","article-title":"AI coach: deep human pose estimation and analysis for personalized athletic training assistance","author":"Wang","year":"2019"},{"key":"10.1016\/j.patcog.2026.113618_bib0008","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5109","article-title":"Monocular 3D human pose estimation for sports broadcasts using partial sports field registration","author":"Baumgartner","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0009","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7291","article-title":"Realtime multi-person 2D pose estimation using part affinity fields","author":"Cao","year":"2017"},{"key":"10.1016\/j.patcog.2026.113618_bib0010","unstructured":"G. Jocher, A. Chaurasia, J. Qiu, J. Qiu, Ultralytics YOLO, 2023, https:\/\/github.com\/ultralytics\/ultralytics."},{"key":"10.1016\/j.patcog.2026.113618_bib0011","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"11656","article-title":"3D human pose estimation with spatial and temporal transformers","author":"Zheng","year":"2021"},{"key":"10.1016\/j.patcog.2026.113618_bib0012","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13232","article-title":"MixSTE: Seq2seq mixed spatio-temporal encoder for 3D human pose estimation in video","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113618_bib0013","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"20438","article-title":"GraFormer: graph-oriented transformer for 3D pose estimation","author":"Zhao","year":"2022"},{"key":"10.1016\/j.patcog.2026.113618_bib0014","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1123","article-title":"KTPFormer: kinematics and trajectory prior knowledge-enhanced transformer for 3D human pose estimation","author":"Peng","year":"2024"},{"key":"10.1016\/j.patcog.2026.113618_bib0015","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13147","article-title":"MHFormer: multi-hypothesis transformer for 3D human pose estimation","author":"Li","year":"2022"},{"key":"10.1016\/j.patcog.2026.113618_bib0016","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"15085","article-title":"MotionBERT: a unified perspective on learning human motion representations","author":"Zhu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0017","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4790","article-title":"3D human pose estimation with spatio-temporal criss-cross attention","author":"Tang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0018","series-title":"Proceedings of the 30th ACM International Conference on Multimedia","first-page":"5496","article-title":"A lightweight graph transformer network for human mesh reconstruction from 2D human pose","author":"Zheng","year":"2022"},{"issue":"7","key":"10.1016\/j.patcog.2026.113618_bib0019","doi-asserted-by":"crossref","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","article-title":"Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments","volume":"36","author":"Ionescu","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113618_bib0020","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"601","article-title":"Recovering accurate 3d human pose in the wild using imus and a moving camera","author":"Von Marcard","year":"2018"},{"issue":"1","key":"10.1016\/j.patcog.2026.113618_bib0021","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","article-title":"HumanEva: synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion","volume":"87","author":"Sigal","year":"2010","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113618_bib0022","series-title":"2017 International Conference on 3D Vision (3DV)","first-page":"506","article-title":"Monocular 3d human pose estimation in the wild using improved cnn supervision","author":"Mehta","year":"2017"},{"key":"10.1016\/j.patcog.2026.113618_bib0023","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5219","article-title":"SportsPose-a dynamic 3D sports pose dataset","author":"Ingwersen","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0024","series-title":"Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports","first-page":"8","article-title":"AthleticsPose: authentic sports motion dataset on athletic field and evaluation of monocular 3D pose estimation ability","author":"Suzuki","year":"2025"},{"key":"10.1016\/j.patcog.2026.113618_bib0025","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"1296","article-title":"Pose-oriented transformer with uncertainty-guided refinement for 2D-to-3D human pose estimation","volume":"Vol. 37","author":"Li","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0026","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"8818","article-title":"GLA-GCN: global-local adaptive graph convolutional network for 3D human pose estimation from monocular video","author":"Yu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113618_bib0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110925","article-title":"GraphMLP: a graph MLP-like architecture for 3D human pose estimation","volume":"158","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113618_bib0028","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110446","article-title":"DGFormer: dynamic graph transformer for 3D human pose estimation","volume":"152","author":"Chen","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113618_bib0029","doi-asserted-by":"crossref","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","article-title":"Exploiting temporal contexts with strided transformer for 3d human pose estimation","volume":"25","author":"Li","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.113618_bib0030","series-title":"European Conference on Computer Vision","first-page":"111","article-title":"3D human pose estimation via non-causal retentive networks","author":"Zheng","year":"2024"},{"key":"10.1016\/j.patcog.2026.113618_bib0031","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111562","article-title":"HMSFT: hierarchical multi-scale spatial-frequency-temporal collaborative transformer for 3D human pose estimation","volume":"164","author":"Zhang","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113618_bib0032","series-title":"Proceedings of the Computer Vision and Pattern Recognition Conference","first-page":"16807","article-title":"HiPART: hierarchical pose autoregressive transformer for occluded 3d human pose estimation","author":"Zheng","year":"2025"},{"key":"10.1016\/j.patcog.2026.113618_bib0033","doi-asserted-by":"crossref","DOI":"10.1109\/TCSVT.2026.3666928","article-title":"Efficient diffusion-based 3D human pose estimation with hierarchical temporal pruning","author":"Bi","year":"2026","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2026.113618_bib0034","series-title":"2025 IEEE International Workshop on Multimedia Signal Processing (MMSP)","first-page":"72","article-title":"HyperDiff: hypergraph guided diffusion model for 3D human pose estimation","author":"Han","year":"2025"},{"key":"10.1016\/j.patcog.2026.113618_bib0035","unstructured":"Q. Cai, L. Zhang, X. Hu, S. Hou, Y. Huang, FastDDHPose: towards unified, efficient, and disentangled 3D human pose estimation, (2025) arXiv: 2512.14162."},{"key":"10.1016\/j.patcog.2026.113618_bib0036","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part v 13","first-page":"740","article-title":"Microsoft COCO: common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.patcog.2026.113618_bib0037","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3686","article-title":"2D human pose estimation: new benchmark and state of the art analysis","author":"Andriluka","year":"2014"},{"key":"10.1016\/j.patcog.2026.113618_bib0038","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"21978","article-title":"FreeMan: towards benchmarking 3D human pose estimation under real-world conditions","author":"Wang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113618_bib0039","doi-asserted-by":"crossref","unstructured":"A. Nibali, J. Millward, Z. He, S. Morgan, ASPset: an outdoor sports pose video dataset with 3D keypoint annotations, Image Vis. Comput. 111 (2021) 104196.","DOI":"10.1016\/j.imavis.2021.104196"},{"key":"10.1016\/j.patcog.2026.113618_bib0040","unstructured":"Y. Sun, L. Dong, S. Huang, S. Ma, Y. Xia, J. Xue, J. Wang, F. Wei, Retentive network: a successor to transformer for large language models, (2023) arXiv: 2307.08621."},{"key":"10.1016\/j.patcog.2026.113618_bib0041","series-title":"Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports","first-page":"95","article-title":"KASportsFormer: kinematic anatomy enhanced transformer for 3D human pose estimation on short sports scene video","author":"Yin","year":"2025"},{"key":"10.1016\/j.patcog.2026.113618_bib0042","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5693","article-title":"Deep high-resolution representation learning for human pose estimation","author":"Sun","year":"2019"},{"key":"10.1016\/j.patcog.2026.113618_bib0043","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7103","article-title":"Cascaded pyramid network for multi-person pose estimation","author":"Chen","year":"2018"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005832?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005832?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T12:54:36Z","timestamp":1776344076000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326005832"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":43,"alternative-id":["S0031320326005832"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113618","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Learning robust representations for highly dynamic pose estimation via mixture of spatiotemporal experts","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113618","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113618"}}