{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T17:20:24Z","timestamp":1769275224641,"version":"3.49.0"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T00:00:00Z","timestamp":1749513600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T00:00:00Z","timestamp":1749513600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2023YFF1203503"],"award-info":[{"award-number":["2023YFF1203503"]}]},{"name":"Natural Science Foundation of Shanghai","award":["22ZR1424200"],"award-info":[{"award-number":["22ZR1424200"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s00371-025-04010-8","type":"journal-article","created":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T02:41:53Z","timestamp":1749523313000},"page":"9919-9932","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["TGST: A transformer-graph framework for enhanced spatiotemporal modeling in 3D human pose estimation"],"prefix":"10.1007","volume":"41","author":[{"given":"Aolei","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yinghong","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenchen","family":"Lv","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Banghua","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhonghua","family":"Miao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minrui","family":"Fei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,10]]},"reference":[{"issue":"4","key":"4010_CR1","doi-asserted-by":"publisher","first-page":"496","DOI":"10.1007\/s40436-021-00363-0","volume":"9","author":"A Yang","year":"2021","unstructured":"Yang, A., Ren, H., Fei, M., Naeem, W.: Multi-person vision tracking approach based on human body localization features. Advances in Manufacturing 9(4), 496\u2013508 (2021)","journal-title":"Advances in Manufacturing"},{"key":"4010_CR2","doi-asserted-by":"publisher","first-page":"102630","DOI":"10.1016\/j.mechatronics.2021.102630","volume":"78","author":"A Yang","year":"2021","unstructured":"Yang, A., Chen, Y., Naeem, W., Fei, M., Chen, L.: Humanoid motion planning of robotic arm based on human arm action feature and reinforcement learning. Mechatronics 78, 102630 (2021)","journal-title":"Mechatronics"},{"issue":"1","key":"4010_CR3","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/s00371-022-02766-x","volume":"40","author":"A Yang","year":"2024","unstructured":"Yang, A., Jin, Z., Guo, S., et al.: Unconstrained human gaze estimation approach for medium-distance scene based on monocular vision. Visual Computer 40(1), 73\u201385 (2024)","journal-title":"Visual Computer"},{"key":"4010_CR4","doi-asserted-by":"crossref","unstructured":"Sun, X., Shang, J., Liang, S., et al.: Compositional human pose regression. In: Proceedings of the IEEE International Conference on Computer Vision, pp 2621\u20132630 (2017).","DOI":"10.1109\/ICCV.2017.284"},{"key":"4010_CR5","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Daniilidis, K.: Ordinal depth supervision for 3D human pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 7307\u20137316 (2018).","DOI":"10.1109\/CVPR.2018.00763"},{"key":"4010_CR6","doi-asserted-by":"crossref","unstructured":"Wang Z, Nie X, Qu X et al (2022) Distribution-aware single stage models for multi-person 3d pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13086\u201313095.","DOI":"10.1109\/CVPR52688.2022.01275"},{"key":"4010_CR7","doi-asserted-by":"crossref","unstructured":"Chen, C., Ramanan, D.: 3D human pose estimation= 2D pose estimation+ matching. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 5759\u20135767 (2017).","DOI":"10.1109\/CVPR.2017.610"},{"key":"4010_CR8","doi-asserted-by":"crossref","unstructured":"Moreno-Noguer, F.: 3D human pose estimation from a single image via distance matrix regression. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1561\u20131570 (2017).","DOI":"10.1109\/CVPR.2017.170"},{"key":"4010_CR9","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J. et al.: A simple yet effective baseline for 3D human pose estimation. In: Proceedings of the IEEE international conference on computer vision, pp 2659\u20132668 (2017).","DOI":"10.1109\/ICCV.2017.288"},{"key":"4010_CR10","unstructured":"Ronchi, M. R., Aodha, O. M., Eng, R., Perona, P.: It's all relative: monocular 3D human pose estimation from weakly supervised data. In: Proceedings of the 29th British Machine Vision Conference (BMVC) (2018)."},{"key":"4010_CR11","doi-asserted-by":"crossref","unstructured":"Tekin, B., M\u00e1rquez-Neila, P., Salzmann, M. et al.: Learning to fuse 2D and 3D image cues for monocular body pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision, pp 3961\u20133970 (2017).","DOI":"10.1109\/ICCV.2017.425"},{"key":"4010_CR12","doi-asserted-by":"crossref","unstructured":"Hossain, M. R. I., James, J.: Exploiting temporal information for 3D human pose estimation. In: Proceedings of the European Conference on Computer Vision, pp 69\u201386 (2018).","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"4010_CR13","doi-asserted-by":"crossref","unstructured":"Dabral, R., Mundhada, A., Kusupati, U. et al.: Learning 3D human pose from structure and motion. In: Lecture Notes in Computer Science, pp 679\u2013696 (2018).","DOI":"10.1007\/978-3-030-01240-3_41"},{"key":"4010_CR14","doi-asserted-by":"crossref","unstructured":"Cai, Y., Ge, L., Liu, J., et al.: Exploiting spatial-temporal relationships for 3D pose estimation via graph convolutional networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 2272\u20132281 (2019).","DOI":"10.1109\/ICCV.2019.00236"},{"issue":"1","key":"4010_CR15","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1109\/TCSVT.2021.3057267","volume":"32","author":"T Chen","year":"2021","unstructured":"Chen, T., Fang, C., Shen, X., et al.: Anatomy-aware 3D human pose estimation with bone-based pose decomposition. IEEE Trans. Circuits Syst. Video Technol. 32(1), 198\u2013209 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"4010_CR16","doi-asserted-by":"crossref","unstructured":"Liu, J., Liu, M., Liu, H., Li, W.: TCPFormer: learning temporal correlation with implicit pose proxy for 3D human pose estimation. arXiv preprint arXiv:2501.01770. (2025)","DOI":"10.1609\/aaai.v39i5.32583"},{"key":"4010_CR17","doi-asserted-by":"publisher","first-page":"110967","DOI":"10.1016\/j.patcog.2024.110967","volume":"158","author":"Y Li","year":"2025","unstructured":"Li, Y., Chen, D., Tang, T., et al.: HTR-VT: Handwritten text recognition with vision transformer. Pattern Recogn. 158, 110967 (2025)","journal-title":"Pattern Recogn."},{"key":"4010_CR18","doi-asserted-by":"publisher","first-page":"110925","DOI":"10.1016\/j.patcog.2024.110925","volume":"158","author":"W Li","year":"2025","unstructured":"Li, W., Liu, M., Liu, H., et al.: GraphMLP: A graph MLP-like architecture for 3D human pose estimation. Pattern Recogn. 158, 110925 (2025)","journal-title":"Pattern Recogn."},{"key":"4010_CR19","doi-asserted-by":"publisher","first-page":"113045","DOI":"10.1016\/j.knosys.2025.113045","volume":"311","author":"U Kilic","year":"2025","unstructured":"Kilic, U., Karadag, O.O., Ozyer, G.T.: AGMS-GCN: Attention-guided multi-scale graph convolutional networks for skeleton-based action recognition[J]. Knowl.-Based Syst. 311, 113045 (2025)","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"4010_CR20","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2021","unstructured":"Cao, Z., Hidalgo, G., Simon, T., et al.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. 43(1), 172\u2013186 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4010_CR21","doi-asserted-by":"crossref","unstructured":"Osokin, D.: Real-time 2D multi-person pose estimation on CPU: lightweight OpenPose. In: Proceedings of the 8th International Conference on Pattern Recognition Applications and Methods, pp 744\u2013748 (2019).","DOI":"10.5220\/0007555407440748"},{"key":"4010_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, J., Yang, H., Deng, Y.: Enhanced human pose estimation with attention-augmented HRNet. In: Proceedings of the 2024 6th International Conference on Image Processing and Machine Vision (IPMV'24). pp 88\u201393 (2024).","DOI":"10.1145\/3645259.3645274"},{"key":"4010_CR23","doi-asserted-by":"crossref","unstructured":"Liu, J., Rojas, J. et al.: A graph attention spatio-temporal convolutional network for 3D human pose estimation in video. In: IEEE International Conference on Robotics and Automation, pp 3374\u20133380 (2021).","DOI":"10.1109\/ICRA48506.2021.9561605"},{"key":"4010_CR24","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X. et al.: Semantic graph convolutional networks for 3d human pose regression. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3420\u20133430 (2019).","DOI":"10.1109\/CVPR.2019.00354"},{"key":"4010_CR25","doi-asserted-by":"publisher","first-page":"9563","DOI":"10.1007\/s12652-023-04629-2","volume":"4","author":"A Yang","year":"2023","unstructured":"Yang, A., Liu, G., Naeem, W., et al.: A monocular 3D human pose estimation approach for virtual character skeleton retargeting. J. Ambient. Intell. Humaniz. Comput. 4, 9563\u20139574 (2023)","journal-title":"J. Ambient. Intell. Humaniz. Comput."},{"issue":"7","key":"4010_CR26","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., et al.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4010_CR27","doi-asserted-by":"crossref","unstructured":"Xie, Y., Du, J., Tsung, F., et al.: FIT3D: real-time flatness inspection algorithm for ceramic tiles using the structured light 3d scanner. In: 2024 IEEE 20th International Conference on Automation Science and Engineering (CASE), pp 897\u2013903 (2024).","DOI":"10.1109\/CASE59546.2024.10711536"},{"key":"4010_CR28","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D. et al.: 3D human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 7753\u20137762 (2019).","DOI":"10.1109\/CVPR.2019.00794"},{"key":"4010_CR29","doi-asserted-by":"crossref","unstructured":"Liu, R., Shen, J., Wang, H., et al.: Attention mechanism exploits temporal contexts: real-time 3D human pose reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5064\u20135073 (2020).","DOI":"10.1109\/CVPR42600.2020.00511"},{"key":"4010_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, W., Wang, W., Tian, Y.: GraFormer: Graph-oriented transformer for 3D pose estimation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 20406\u201320415 (2022).","DOI":"10.1109\/CVPR52688.2022.01979"},{"key":"4010_CR31","doi-asserted-by":"crossref","unstructured":"Moon, G., Chang, J., Lee, K. et al.: Camera distance-aware top-down approach for 3D multi-person pose estimation from a single RGB image. In: Proceedings of the IEEE International Conference on Computer Vision, pp 10132\u201310141 (2019).","DOI":"10.1109\/ICCV.2019.01023"},{"issue":"7","key":"4010_CR32","doi-asserted-by":"publisher","first-page":"1654","DOI":"10.1109\/TPAMI.2019.2901875","volume":"42","author":"Y Chen","year":"2019","unstructured":"Chen, Y., Shen, C., et al.: Adversarial learning of structure-aware fully convolutional networks for landmark localization. IEEE Trans. Pattern Anal. Mach. Intell. 42(7), 1654\u20131669 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4010_CR33","doi-asserted-by":"crossref","unstructured":"Li, Y., Li, K., Jiang, S. et al.: Geometry-driven self-supervised method for 3D human pose estimation. In: AAAI Conference on Artificial Intelligence, pp 11442\u201311449 (2020).","DOI":"10.1609\/aaai.v34i07.6808"},{"key":"4010_CR34","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G. et al.: Learning to reconstruct 3D human pose and shape via model-fitting in the loop. In: IEEE\/CVF International Conference on Computer Vision, pp 2252\u20132261 (2019).","DOI":"10.1109\/ICCV.2019.00234"},{"key":"4010_CR35","unstructured":"Zanfir, A., Marinoiu, E., Zanfir, M. et al.: Deep network for the integrated 3d sensing of multiple people in natural images. In: Advances in Neural Information Processing Systems, pp 8410\u20138419 (2018)."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04010-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04010-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04010-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:35:50Z","timestamp":1757928950000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04010-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,10]]},"references-count":35,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["4010"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04010-8","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,10]]},"assertion":[{"value":"14 May 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}