{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T01:52:42Z","timestamp":1773798762574,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T00:00:00Z","timestamp":1695168000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T00:00:00Z","timestamp":1695168000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771420"],"award-info":[{"award-number":["61771420"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62001413"],"award-info":[{"award-number":["62001413"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003787","name":"Natural Science Foundation of Hebei Province","doi-asserted-by":"publisher","award":["F2020203064"],"award-info":[{"award-number":["F2020203064"]}],"id":[{"id":"10.13039\/501100003787","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Project of Hebei Education Department","award":["BJK2023117"],"award-info":[{"award-number":["BJK2023117"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s00371-023-03088-2","type":"journal-article","created":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T09:01:46Z","timestamp":1695200506000},"page":"4387-4399","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["A fused convolutional spatio-temporal progressive approach for 3D human pose estimation"],"prefix":"10.1007","volume":"40","author":[{"given":"Hehao","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0300-6144","authenticated-orcid":false,"given":"Zhengping","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Zhe","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Mengyao","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Shuai","family":"Bi","sequence":"additional","affiliation":[]},{"given":"Jirui","family":"Di","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,20]]},"reference":[{"key":"3088_CR1","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1007\/s00371-021-02339-4","volume":"39","author":"K Wang","year":"2023","unstructured":"Wang, K., Zhang, G., Yang, J.: 3D human pose and shape estimation with dense correspondence from a single depth image. Vis. Comput. 39, 429\u2013441 (2023)","journal-title":"Vis. Comput."},{"key":"3088_CR2","doi-asserted-by":"publisher","first-page":"1401","DOI":"10.1007\/s00371-019-01740-4","volume":"36","author":"J Wu","year":"2020","unstructured":"Wu, J., Hu, D., Xiang, F., et al.: 3D human pose estimation by depth map. Vis. Comput. 36, 1401\u20131410 (2020)","journal-title":"Vis. Comput."},{"key":"3088_CR3","doi-asserted-by":"crossref","unstructured":"Gao, B., Zhang, Z., Wu, C., et al.: Staged cascaded network for monocular 3D human pose estimation. Appl. Intell. (2022)","DOI":"10.1007\/s10489-022-03516-1"},{"key":"3088_CR4","doi-asserted-by":"publisher","first-page":"2191","DOI":"10.1007\/s00371-022-02473-7","volume":"39","author":"Z Qiu","year":"2023","unstructured":"Qiu, Z., Zhang, H., Deng, W., et al.: Effective skeleton topology and semantics-guided adaptive graph convolution network for action recognition. Vis. Comput. 39, 2191\u20132203 (2023)","journal-title":"Vis. Comput."},{"key":"3088_CR5","doi-asserted-by":"publisher","first-page":"4243","DOI":"10.1007\/s00521-022-07083-x","volume":"35","author":"H Yuan","year":"2023","unstructured":"Yuan, H., Lee, J.H., Zhang, S.: Research on simulation of 3D human animation vision technology based on an enhanced machine learning algorithm. Neural Comput. Appl. 35, 4243\u20134254 (2023)","journal-title":"Neural Comput. Appl."},{"key":"3088_CR6","doi-asserted-by":"publisher","first-page":"2023","DOI":"10.1007\/s00371-021-02263-7","volume":"38","author":"M Mofarreh-Bonab","year":"2022","unstructured":"Mofarreh-Bonab, M., Seyedarabi, H., Mozaffari Tazehkand, B., et al.: 3D hand pose estimation using RGBD images and hybrid deep learning networks. Vis. Comput. 38, 2023\u20132032 (2022)","journal-title":"Vis. Comput."},{"key":"3088_CR7","doi-asserted-by":"crossref","unstructured":"Hua, G., Liu, H., Li, W., Zhang, Q., Ding, R., Xu, X.: Weakly-supervised 3D human pose estimation with cross-view U-shaped graph convolutional network. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3171102"},{"key":"3088_CR8","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.J.: A simple yet effective baseline for 3d human pose estimation. In: IEEE International Conference on Computer Vision, pp. 2640\u20132649 (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"3088_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Lin, K., Liu, W., Qian, C., Lin, L.: Weakly-supervised discovery of geometry-aware representation for 3d human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 10895\u201310904 (2019)","DOI":"10.1109\/CVPR.2019.01115"},{"key":"3088_CR10","doi-asserted-by":"crossref","unstructured":"Hossain, M.R.I., Little, J.J.: Exploiting temporal information for 3d human pose estimation. In: European conference on computer vision, pp. 69\u201386 (2018)","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"3088_CR11","doi-asserted-by":"crossref","unstructured":"Chen, T., Fang, C., Shen, X., Zhu, Y., Chen, Z., Luo, J.: Anatomy-aware 3D human pose estimation with bone-based pose decomposition. IEEE Trans. Circuits Syst. Video Technol. (2021)","DOI":"10.1109\/TCSVT.2021.3057267"},{"key":"3088_CR12","doi-asserted-by":"crossref","unstructured":"Liu, R., Shen, J., Wang, H., Chen, C., Cheung, S., Asari, V.: Attention mechanism exploits temporal contexts: Real-time 3D human pose reconstruction. In: IEEE Conference on Computer Vision and Pattern Recognition, 5064\u20135073 (2020)","DOI":"10.1109\/CVPR42600.2020.00511"},{"key":"3088_CR13","doi-asserted-by":"crossref","unstructured":"Wang, J., Yan, S., Xiong, Y., Lin, D.: Motion guided 3D pose estimation from videos. In: European conference on computer vision, pp. 764\u2013780 (2020)","DOI":"10.1007\/978-3-030-58601-0_45"},{"key":"3088_CR14","doi-asserted-by":"publisher","first-page":"7914","DOI":"10.1109\/TIP.2021.3109517","volume":"30","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Wang, Y., Zhou, Z., Luan, T., Wang, Z., Qiao, Y.: learning dynamical human-joint affinity for 3D pose estimation in videos. IEEE Trans. Image Process. 30, 7914\u20137925 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"3088_CR15","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Yang, L., Zhao, N., Liu, M., Xu, Q.: Learning skeletal graph neural networks for hard 3D pose estimation. In: IEEE International Conference on Computer Vision, pp. 11416\u201311425 (2021)","DOI":"10.1109\/ICCV48922.2021.01124"},{"key":"3088_CR16","unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021)"},{"key":"3088_CR17","doi-asserted-by":"crossref","unstructured":"Shuai, H., Wu, L., Liu, Q.: Adaptive multi-view and temporal fusing transformer for 3D human pose estimation. IEEE Trans. Pattern Anal. Mach. Intell. (2022)","DOI":"10.1109\/TPAMI.2022.3188716"},{"key":"3088_CR18","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: End-to-end human pose and mesh reconstruction with transformers. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 1954\u20131963 (2021)","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"3088_CR19","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3D human pose estimation with spatial and temporal transformers. In: IEEE International Conference on Computer Vision, pp. 11636\u201311645 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"3088_CR20","doi-asserted-by":"crossref","unstructured":"Ma, X., Su, J., Wang, C., Ci, H., Wang, Y.: Context modeling in 3d human pose estimation: a unified perspective. IEEE Conference on Computer Vision and Pattern Recognition, 6238\u20136247 (2021)","DOI":"10.1109\/CVPR46437.2021.00617"},{"key":"3088_CR21","doi-asserted-by":"publisher","first-page":"494","DOI":"10.1109\/TPAMI.2019.2894422","volume":"42","author":"J Liu","year":"2019","unstructured":"Liu, J., Ding, H., Shahroudy, A., Duan, L., Jiang, X., Wang, G., Kot, A.C.: Feature boosting network for 3d pose estimation. IEEE Trans. Pattern Anal. Mach. Intell. 42, 494\u2013501 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3088_CR22","doi-asserted-by":"crossref","unstructured":"Sun, X., Xiao, B., Wei, F., Liang, S., Wei, Y.: Integral human pose regression. In: European Conference on Computer Vision, pp. 529\u2013545 (2018)","DOI":"10.1109\/ICCV.2017.284"},{"key":"3088_CR23","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Gool, L.V.: MHFormer: Multi-hypothesis transformer for 3D human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 13147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"3088_CR24","doi-asserted-by":"crossref","unstructured":"Li, C., Lee, G.H.: Generating multiple hypotheses for 3D human pose estimation with mixture density network. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 9887\u20139895 (2019)","DOI":"10.1109\/CVPR.2019.01012"},{"key":"3088_CR25","doi-asserted-by":"crossref","unstructured":"Ci, H., Wang, C., Ma, X., Wang, Y.: Optimizing network structure for 3d human pose estimation. In: IEEE International Conference on Computer Vision, pp. 2262\u20132271 (2019)","DOI":"10.1109\/ICCV.2019.00235"},{"key":"3088_CR26","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"3088_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask r-cnn. In: IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"3088_CR28","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 5686\u20135696 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"3088_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X., Tian, Y., Kapadia, M., Metaxas, D.N.: Semantic graph convolutional networks for 3d human pose regression. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 3425\u20133435 (2019)","DOI":"10.1109\/CVPR.2019.00354"},{"key":"3088_CR30","doi-asserted-by":"crossref","unstructured":"Xu, T., Takano, W.: Graph stacked hourglass networks for 3d human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 16105\u201316114 (2021)","DOI":"10.1109\/CVPR46437.2021.01584"},{"key":"3088_CR31","doi-asserted-by":"crossref","unstructured":"Zou, Z., Tang, W.: Modulated graph convolutional network for 3D human pose estimation. In: IEEE International Conference on Computer Vision, pp. 11477\u201311487 (2021)","DOI":"10.1109\/ICCV48922.2021.01128"},{"key":"3088_CR32","doi-asserted-by":"crossref","unstructured":"Cai, Y., Ge, L., Liu, J., Cai, J., Cham, T.J., Yuan, J., Thalmann, N.M.: Exploiting spatio-temporal relationships for 3d pose estimation via graph convolutional networks. In: IEEE International Conference on Computer Vision, pp. 2272\u20132281 (2019)","DOI":"10.1109\/ICCV.2019.00236"},{"key":"3088_CR33","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3D human pose estimation in video with temporal convolutions and semi-supervised training. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 7753\u20137762 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"3088_CR34","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Yang, B., Wang, B., Wending, Y., Tan, R.: Occlusion-aware networks for 3d human pose estimation in video. In: IEEE International Conference on Computer Vision, pp. 723\u2013732 (2019)","DOI":"10.1109\/ICCV.2019.00081"},{"key":"3088_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: MixSTE: Seq2seq mixed spatio-temporal encoder for 3D human pose estimation in video. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 13232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"3088_CR36","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Ding, R., Liu, M., Wang, P., Yang, W.: Exploiting temporal contexts with strided transformer for 3D human pose estimation. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3141231"},{"key":"3088_CR37","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: TransPose: keypoint localization via transformer. In: IEEE International Conference on Computer Vision, pp. 11782\u201311792 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"3088_CR38","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456 (2015)"},{"key":"3088_CR39","unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted Boltzmann machines. In: International Conference on Machine Learning (ICML), pp. 807\u2013814 (2010)"},{"key":"3088_CR40","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2013","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36, 1325\u20131339 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3088_CR41","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A.O., Black, M.J.: HumanEva: synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vision 87, 4\u201327 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"3088_CR42","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3d human pose estimation in the wild using improved cnn supervision. In: International Conference on 3D Vision (3DV), pp. 506\u2013516 (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"3088_CR43","doi-asserted-by":"crossref","unstructured":"Gong, K., Zhang, J., Feng, J.: Poseaug: A differentiable pose augmentation framework for 3d human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 8575\u20138584 (2021)","DOI":"10.1109\/CVPR46437.2021.00847"},{"key":"3088_CR44","first-page":"8163","volume":"32","author":"R Yeh","year":"2019","unstructured":"Yeh, R., Hu, Y., Schwing, A.: Chirality nets for human pose regression. Adv. Neural. Inf. Process. Syst. 32, 8163\u20138173 (2019)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3088_CR45","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Huang, F., Liu, M., Xu, Q., Lin, S.: Srnet: improving generalization in 3d human pose estimation with a split-and-recombine approach. In: European Conference on Computer Vision, pp. 507\u2013523 (2020)","DOI":"10.1007\/978-3-030-58568-6_30"},{"key":"3088_CR46","unstructured":"Lin, J., Lee, G.H.: Trajectory space factorization for deep video-based 3d human pose estimation. In: British Machine Vision Conference (2019)"},{"key":"3088_CR47","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Derpanis, K.G., Daniilidis, K.: Coarse-to-fine volumetric prediction for single-image 3d human pose. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 7025\u20137034 (2017)","DOI":"10.1109\/CVPR.2017.139"},{"key":"3088_CR48","doi-asserted-by":"crossref","unstructured":"Lee, K., Lee, I., Lee, S.: Propagating lstm: 3d pose estimation based on joint interdependency. In: European Conference on Computer Vision, pp. 119\u2013135 (2018)","DOI":"10.1007\/978-3-030-01234-2_8"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03088-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-03088-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03088-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T11:09:55Z","timestamp":1717672195000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-03088-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,20]]},"references-count":48,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["3088"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-03088-2","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,20]]},"assertion":[{"value":"30 August 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 September 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that we do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted. The authors declare that they have no conflicts of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}