{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:33:29Z","timestamp":1772120009479,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s11760-024-03670-8","type":"journal-article","created":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T05:09:43Z","timestamp":1733375383000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Split-and-recombine and vision transformer based 3D human pose estimation"],"prefix":"10.1007","volume":"19","author":[{"given":"Xinyi","family":"Lu","sequence":"first","affiliation":[]},{"given":"Fan","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Shuiyi","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Tianqi","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7822-4635","authenticated-orcid":false,"given":"Jianling","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,5]]},"reference":[{"key":"3670_CR1","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Huang, F., Liu, M., Xu, Q., Lin, S.: SRNet: Improving generalization in 3D human pose estimation with a split-and-recombine approach. In: CVPR, pp. 507\u2013523 (2020)","DOI":"10.1007\/978-3-030-58568-6_30"},{"key":"3670_CR2","unstructured":"Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. In: NeurIPS, pp. 5998\u20136008 (2017)"},{"key":"3670_CR3","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.: A simple yet effective baseline for 3D human pose estimation. In: ICCV, pp. 2659\u20132668 (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"3670_CR4","doi-asserted-by":"crossref","unstructured":"Fang, H., Xu, Y., Wang, W., Liu, X., Zhu, S.: Learning pose grammar to encode human body configuration for 3D pose estimation. In: AAAI, pp. 6821\u20136828 (2018)","DOI":"10.1609\/aaai.v32i1.12270"},{"key":"3670_CR5","doi-asserted-by":"crossref","unstructured":"Tekin, B., M\u00e1rquez-Neila, P., Salzmann, M., Fua, P.: Learning to fuse 2D and 3D image cues for monocular body pose estimation. In: ICCV, pp. 3961\u20133970 (2017)","DOI":"10.1109\/ICCV.2017.425"},{"key":"3670_CR6","doi-asserted-by":"crossref","unstructured":"Ci H., Wang C., Ma X., Wang Y.: Optimizing network structure for 3D human pose estimation. In: ICCV, pp. 2262\u20132271 (2019)","DOI":"10.1109\/ICCV.2019.00235"},{"key":"3670_CR7","unstructured":"Defferrard, M., Bresson, X., Vandergheynst, P.: Convolutional neural networks on graphs with fast localized spectral filtering. In: NeurIPS, pp. 3844\u20133852 (2016)"},{"key":"3670_CR8","doi-asserted-by":"crossref","unstructured":"Zhao L, Peng X, Tian Y.: Semantic graph convolutional networks for 3D human pose regression. In: CVPR, pp. 3420\u20133430 (2019)","DOI":"10.1109\/CVPR.2019.00354"},{"key":"3670_CR9","doi-asserted-by":"crossref","unstructured":"Li, S., Ke, L., Pratama, K., Tai, Y., Tang, C., Cheng, K.: Cascaded deep monocular 3D human pose estimation with evolutionary training data. In: CVPR, pp. 6172\u20136182. (2020)","DOI":"10.1109\/CVPR42600.2020.00621"},{"key":"3670_CR10","doi-asserted-by":"publisher","first-page":"693","DOI":"10.1007\/s11760-019-01602-5","volume":"14","author":"D Miki","year":"2020","unstructured":"Miki, D., Abe, S., Chen, S., Demachi, K.: Robust human pose estimation from distorted wide-angle images through iterative search of transformation parameters. SIViP 14, 693\u2013700 (2020)","journal-title":"SIViP"},{"key":"3670_CR11","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/s11760-021-01922-5","volume":"16","author":"B Li","year":"2021","unstructured":"Li, B., Ji, Y., Li, Y., Xu, Y., Liu, C.: Pose knowledge transfer for multi-person pose estimation. SIViP 16, 321\u2013328 (2021)","journal-title":"SIViP"},{"key":"3670_CR12","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1007\/s11760-021-01999-y","volume":"16","author":"T Wan","year":"2022","unstructured":"Wan, T., Luo, Y., Zhang, Z., Ou, Z.: TSNet: tree structure network for human pose estimation. SIViP 16, 551\u2013558 (2022)","journal-title":"SIViP"},{"key":"3670_CR13","doi-asserted-by":"crossref","unstructured":"Tang Z., Li J., Hao Y., Hong R.: MLP-JCG: Multi-Layer Perceptron with Joint-Coordinate Gating for Efficient 3D Human Pose Estimation. IEEE Trans. Multimedia, pp. 1\u201313 (2023)","DOI":"10.1109\/TMM.2023.3240455"},{"key":"3670_CR14","doi-asserted-by":"crossref","unstructured":"Cai Y, Ge L, Liu J, et al. Exploiting spatial-temporal relationships for 3D pose estimation via graph convolutional networks. In: ICCV, pp. 2272\u20132281 (2019)","DOI":"10.1109\/ICCV.2019.00236"},{"key":"3670_CR15","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3D human pose estimation in video with temporal convolutions and semi-supervised training.In: CVPR, pp. 7745\u20137754 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"3670_CR16","doi-asserted-by":"crossref","unstructured":"Cheng Y., Yang B., Wang B., Wending Y., Tan R.: Occlusion-aware networks for 3D human pose estimation in video. In: ICCV, pp. 723\u2013732 (2019)","DOI":"10.1109\/ICCV.2019.00081"},{"key":"3670_CR17","doi-asserted-by":"publisher","first-page":"1047","DOI":"10.1016\/j.jvcir.2020.102866","volume":"71","author":"P Verma","year":"2020","unstructured":"Verma, P., Rajeev, S.: Three stage deep network for 3D human pose reconstruction by exploiting spatial and temporal data via its 2D pose. J. Vis. Commun. Image Represent. 71, 1047\u20133203 (2020)","journal-title":"J. Vis. Commun. Image Represent."},{"issue":"6","key":"3670_CR18","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1080\/21681163.2021.1902400","volume":"9","author":"P Verma","year":"2021","unstructured":"Verma, P., Rajeev, S.: Reconsideration of multi-stage deep network for human pose estimation. Comput. Methods Biomech. Biomed. Eng. Imaging Vis. 9(6), 600\u2013612 (2021)","journal-title":"Comput. Methods Biomech. Biomed. Eng. Imaging Vis."},{"key":"3670_CR19","doi-asserted-by":"publisher","first-page":"2417","DOI":"10.1007\/s00371-021-02120-7","volume":"38","author":"P Verma","year":"2022","unstructured":"Verma, P., Srivastava, R.: Two-stage multi-view deep network for 3D human pose reconstruction using images and its 2D joint heatmaps through enhanced stack-hourglass approach. Vis. Comput. 38, 2417\u20132430 (2022)","journal-title":"Vis. Comput."},{"issue":"1","key":"3670_CR20","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1109\/TCSVT.2021.3057267","volume":"32","author":"T Chen","year":"2022","unstructured":"Chen, T., Fang, C., Shen, X., Zhu, Y., Chen, Z., Luo, J.: Anatomy-aware 3D human pose estimation with bone-based pose decomposition. IEEE Trans. Circuits Syst. Video Technol. 32(1), 198\u2013209 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3670_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Feng, R., Chen, H., Wu, S., Gao, Y., Gao, Y., Wang, X.: Temporal Feature Alignment and Mutual Information Maximization for Video-Based Human Pose Estimation. In: CVPR, pp. 10996\u201311006 (2022)","DOI":"10.1109\/CVPR52688.2022.01073"},{"key":"3670_CR22","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)"},{"key":"3670_CR23","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.:TransPose: Keypoint localization via Transformer. In: IEEE International Conference on Computer Vision, pp. 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"3670_CR24","unstructured":"Yuan, Y., Fu, R., Huang, L., Lin, W., Zhang, C., Chen, X., Wang, J.: HRFormer: High-Resolution Transformer for Dense Prediction. arXiv preprint arXiv: 2110.09408 (2021)"},{"key":"3670_CR25","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep High-Resolution Representation Learning for Human Pose Estimation. In: CVPR, pp. 5686\u20135696 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"3670_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, W., Tian, Y., Ye, Q., Jiao, J., Wang, W.: GraFormer: Graph-oriented Transformer for 3D Pose Estimation. In: CVPR, pp. 20406\u201320415 (2022)","DOI":"10.1109\/CVPR52688.2022.01979"},{"key":"3670_CR27","unstructured":"Park S., Kwak N.: 3D human pose estimation with relational networks. arXiv preprint arXiv:1805.08961 (2018)"},{"issue":"7","key":"3670_CR28","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Machine Intell. 36(7), 1325\u20131339 (2014)","journal-title":"IEEE Trans. Pattern Anal. Machine Intell."},{"key":"3670_CR29","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P.V., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: 3DV, pp. 506\u2013516 (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"3670_CR30","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., K\u00f6pf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., Bai, J., Chintala, S.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv: 1908.08289arXiv:1912.01703 (2019)"},{"key":"3670_CR31","unstructured":"Reddi, S.J., Kale, S., Kumar, S.: On the convergence of Adam and Beyond. arXiv preprint arXiv: 1904.09237 (2018)"},{"key":"3670_CR32","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: CVPR, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"3670_CR33","unstructured":"Lin J., Lee G. H.: Trajectory space factorization for deep video-based 3d human pose estimation. arXiv preprint arXiv: 1908.08289 (2019)"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03670-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-024-03670-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03670-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T12:49:53Z","timestamp":1738068593000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-024-03670-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,5]]},"references-count":33,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["3670"],"URL":"https:\/\/doi.org\/10.1007\/s11760-024-03670-8","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-3237608\/v1","asserted-by":"object"}]},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,5]]},"assertion":[{"value":"5 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"65"}}