{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:42:57Z","timestamp":1773250977697,"version":"3.50.1"},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.imavis.2026.105927","type":"journal-article","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T17:03:57Z","timestamp":1770397437000},"page":"105927","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Mamba-Driven Topology Fusion for monocular 3D human pose estimation"],"prefix":"10.1016","volume":"168","author":[{"given":"Zenghao","family":"Zheng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6210-2534","authenticated-orcid":false,"given":"Lianping","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jinshan","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Hegui","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"8","key":"10.1016\/j.imavis.2026.105927_b1","first-page":"2752","article-title":"Multi-task deep learning for real-time 3D human pose estimation and action recognition","volume":"43","author":"Luvizon","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.imavis.2026.105927_b2","doi-asserted-by":"crossref","first-page":"133330","DOI":"10.1109\/ACCESS.2020.3010248","article-title":"The progress of human pose estimation: A survey and taxonomy of models applied in 2D human pose estimation","volume":"8","author":"Munea","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.imavis.2026.105927_b3","doi-asserted-by":"crossref","unstructured":"A.S. Jackson, C. Manafas, G. Tzimiropoulos, 3d human body reconstruction from a single image via volumetric regression, in: Proceedings of the European Conference on Computer Vision Workshops, ECCV, 2018.","DOI":"10.1007\/978-3-030-11018-5_6"},{"key":"10.1016\/j.imavis.2026.105927_b4","doi-asserted-by":"crossref","unstructured":"A. Kanazawa, M.J. Black, D.W. Jacobs, J. Malik, End-to-end recovery of human shape and pose, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 7122\u20137131.","DOI":"10.1109\/CVPR.2018.00744"},{"key":"10.1016\/j.imavis.2026.105927_b5","series-title":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"10676","article-title":"Traffic control gesture recognition for autonomous vehicles","author":"Wiederer","year":"2020"},{"key":"10.1016\/j.imavis.2026.105927_b6","series-title":"Stacked Hourglass Networks for Human Pose Estimation","author":"Newell","year":"2016"},{"key":"10.1016\/j.imavis.2026.105927_b7","doi-asserted-by":"crossref","unstructured":"K. Sun, B. Xiao, D. Liu, J. Wang, Deep high-resolution representation learning for human pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 5693\u20135703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"10.1016\/j.imavis.2026.105927_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2025.105425","article-title":"EHGFormer: An efficient hypergraph-injected transformer for 3D human pose estimation","volume":"154","author":"Zheng","year":"2025","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2026.105927_b9","doi-asserted-by":"crossref","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","article-title":"Exploiting temporal contexts with strided transformer for 3d human pose estimation","volume":"25","author":"Li","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.imavis.2026.105927_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2024.105142","article-title":"STAFFormer: Spatio-temporal adaptive fusion transformer for efficient 3D human pose estimation","volume":"149","author":"Hao","year":"2024","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2026.105927_b11","doi-asserted-by":"crossref","unstructured":"W. Li, M. Liu, H. Liu, P. Wang, J. Cai, N. Sebe, Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 604\u2013613.","DOI":"10.1109\/CVPR52733.2024.00064"},{"key":"10.1016\/j.imavis.2026.105927_b12","series-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023"},{"key":"10.1016\/j.imavis.2026.105927_b13","series-title":"PoseMamba: Monocular 3D human pose estimation with bidirectional global-local spatio-temporal state space model","author":"Huang","year":"2024"},{"key":"10.1016\/j.imavis.2026.105927_b14","series-title":"Pose magic: Efficient and temporally consistent human pose estimation with a hybrid mamba-GCN network","author":"Zhang","year":"2024"},{"key":"10.1016\/j.imavis.2026.105927_b15","series-title":"HGMamba: Enhancing 3D human pose estimation with a hypergcn-mamba network","author":"Cui","year":"2025"},{"key":"10.1016\/j.imavis.2026.105927_b16","series-title":"Efficiently modeling long sequences with structured state spaces","author":"Gu","year":"2021"},{"key":"10.1016\/j.imavis.2026.105927_b17","article-title":"Convolutional neural networks on graphs with fast localized spectral filtering","volume":"29","author":"Defferrard","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.imavis.2026.105927_b18","first-page":"2127","article-title":"Hamba: Single-view 3d hand reconstruction with graph-guided bi-scanning mamba","volume":"37","author":"Dong","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"7","key":"10.1016\/j.imavis.2026.105927_b19","doi-asserted-by":"crossref","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","article-title":"Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments","volume":"36","author":"Ionescu","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.imavis.2026.105927_b20","series-title":"2017 International Conference on 3D Vision","first-page":"506","article-title":"Monocular 3d human pose estimation in the wild using improved cnn supervision","author":"Mehta","year":"2017"},{"key":"10.1016\/j.imavis.2026.105927_b21","doi-asserted-by":"crossref","unstructured":"J. Zhang, Z. Tu, J. Yang, Y. Chen, J. Yuan, Mixste: Seq2seq mixed spatio-temporal encoder for 3d human pose estimation in video, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 13232\u201313242.","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"10.1016\/j.imavis.2026.105927_b22","doi-asserted-by":"crossref","unstructured":"S. Li, L. Ke, K. Pratama, Y.-W. Tai, C.-K. Tang, K.-T. Cheng, Cascaded deep monocular 3d human pose estimation with evolutionary training data, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 6173\u20136183.","DOI":"10.1109\/CVPR42600.2020.00621"},{"key":"10.1016\/j.imavis.2026.105927_b23","series-title":"European Conference on Computer Vision","first-page":"461","article-title":"P-stmo: Pre-trained spatial temporal many-to-one model for 3d human pose estimation","author":"Shan","year":"2022"},{"key":"10.1016\/j.imavis.2026.105927_b24","doi-asserted-by":"crossref","unstructured":"D. Pavllo, C. Feichtenhofer, D. Grangier, M. Auli, 3D human pose estimation in video with temporal convolutions and semi-supervised training, in: Conference on Computer Vision and Pattern Recognition, CVPR, 2019.","DOI":"10.1109\/CVPR.2019.00794"},{"key":"10.1016\/j.imavis.2026.105927_b25","doi-asserted-by":"crossref","unstructured":"W. Zhu, X. Ma, Z. Liu, L. Liu, W. Wu, Y. Wang, Motionbert: A unified perspective on learning human motion representations, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023, pp. 15085\u201315099.","DOI":"10.1109\/ICCV51070.2023.01385"},{"key":"10.1016\/j.imavis.2026.105927_b26","doi-asserted-by":"crossref","unstructured":"S. Mehraban, V. Adeli, B. Taati, MotionAGFormer: Enhancing 3D Human Pose Estimation with a Transformer-GCNFormer Network, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2024, pp. 6920\u20136930.","DOI":"10.1109\/WACV57701.2024.00677"},{"key":"10.1016\/j.imavis.2026.105927_b27","doi-asserted-by":"crossref","unstructured":"Z. Tang, Z. Qiu, Y. Hao, R. Hong, T. Yao, 3D human pose estimation with spatio-temporal criss-cross attention, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 4790\u20134799.","DOI":"10.1109\/CVPR52729.2023.00464"},{"key":"10.1016\/j.imavis.2026.105927_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2023.104863","article-title":"STRFormer: Spatial\u2013temporal\u2013retemporal transformer for 3D human pose estimation","volume":"140","author":"Liu","year":"2023","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2026.105927_b29","doi-asserted-by":"crossref","unstructured":"M. Einfalt, K. Ludwig, R. Lienhart, Uplift and upsample: Efficient 3d human pose estimation with uplifting transformers, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2023, pp. 2903\u20132913.","DOI":"10.1109\/WACV56688.2023.00292"},{"key":"10.1016\/j.imavis.2026.105927_b30","doi-asserted-by":"crossref","unstructured":"Q. Zhao, C. Zheng, M. Liu, P. Wang, C. Chen, Poseformerv2: Exploring frequency domain for efficient and robust 3d human pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 8877\u20138886.","DOI":"10.1109\/CVPR52729.2023.00857"},{"key":"10.1016\/j.imavis.2026.105927_b31","series-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","author":"Zhu","year":"2024"},{"key":"10.1016\/j.imavis.2026.105927_b32","series-title":"Vmamba: Visual state space model","author":"Liu","year":"2024"},{"key":"10.1016\/j.imavis.2026.105927_b33","doi-asserted-by":"crossref","unstructured":"Q. Cai, X. Hu, S. Hou, L. Yao, Y. Huang, Disentangled Diffusion-Based 3D Human Pose Estimation with Hierarchical Spatial and Temporal Denoiser, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38, 2024, pp. 882\u2013890.","DOI":"10.1609\/aaai.v38i2.27847"},{"key":"10.1016\/j.imavis.2026.105927_b34","article-title":"BCDPose: Diffusion-based 3D human pose estimation with bone-chain prior knowledge","author":"Liu","year":"2025","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2026.105927_b35","doi-asserted-by":"crossref","first-page":"4278","DOI":"10.1109\/TIP.2022.3182269","article-title":"Boosting monocular 3d human pose estimation with part aware attention","volume":"31","author":"Xue","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105927_b36","doi-asserted-by":"crossref","unstructured":"Y. Cai, L. Ge, J. Liu, J. Cai, T.-J. Cham, J. Yuan, N.M. Thalmann, Exploiting spatial-temporal relationships for 3d pose estimation via graph convolutional networks, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 2272\u20132281.","DOI":"10.1109\/ICCV.2019.00236"},{"key":"10.1016\/j.imavis.2026.105927_b37","doi-asserted-by":"crossref","first-page":"906","DOI":"10.1109\/TIP.2021.3136613","article-title":"Limb pose aware networks for monocular 3D pose estimation","volume":"31","author":"Wu","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2026.105927_b38","first-page":"572","article-title":"Combining recurrent, convolutional, and continuous-time models with linear state space layers","volume":"34","author":"Gu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.imavis.2026.105927_b39","series-title":"Learning multi-dimensional edge feature-based au relation graph for facial action unit recognition","author":"Luo","year":"2022"},{"key":"10.1016\/j.imavis.2026.105927_b40","doi-asserted-by":"crossref","unstructured":"C. Zheng, S. Zhu, M. Mendieta, T. Yang, C. Chen, Z. Ding, 3d human pose estimation with spatial and temporal transformers, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 11656\u201311665.","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"10.1016\/j.imavis.2026.105927_b41","series-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017"},{"key":"10.1016\/j.imavis.2026.105927_b42","article-title":"Frame-padded multiscale transformer for monocular 3D human pose estimation","author":"Zhong","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.imavis.2026.105927_b43","doi-asserted-by":"crossref","unstructured":"W. Li, H. Liu, H. Tang, P. Wang, L. Van Gool, Mhformer: Multi-hypothesis transformer for 3d human pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 13147\u201313156.","DOI":"10.1109\/CVPR52688.2022.01280"},{"issue":"2","key":"10.1016\/j.imavis.2026.105927_b44","doi-asserted-by":"crossref","first-page":"911","DOI":"10.1109\/TCSVT.2023.3286402","article-title":"FTCM: Frequency-temporal collaborative module for efficient 3D human pose estimation in video","volume":"34","author":"Tang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2026.105927_b45","doi-asserted-by":"crossref","unstructured":"J. Peng, Y. Zhou, P. Mok, KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 1123\u20131132.","DOI":"10.1109\/CVPR52733.2024.00113"},{"key":"10.1016\/j.imavis.2026.105927_b46","series-title":"RePOSE: 3D Human Pose Estimation via Spatio-Temporal Depth Relational Consistency","author":"Sun","year":"2024"}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885626000338?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885626000338?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T22:04:12Z","timestamp":1773180252000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885626000338"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":46,"alternative-id":["S0262885626000338"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2026.105927","relation":{},"ISSN":["0262-8856"],"issn-type":[{"value":"0262-8856","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Mamba-Driven Topology Fusion for monocular 3D human pose estimation","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2026.105927","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"105927"}}