{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T08:51:25Z","timestamp":1780476685497,"version":"3.54.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726545","type":"print"},{"value":"9783031726552","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T00:00:00Z","timestamp":1733443200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T00:00:00Z","timestamp":1733443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72655-2_18","type":"book-chapter","created":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T10:11:57Z","timestamp":1733393517000},"page":"309-325","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["RePOSE: 3D Human Pose Estimation via\u00a0Spatio-Temporal Depth Relational Consistency"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8515-9189","authenticated-orcid":false,"given":"Ziming","family":"Sun","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0942-9781","authenticated-orcid":false,"given":"Yuan","family":"Liang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9536-5231","authenticated-orcid":false,"given":"Zejun","family":"Ma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4467-5863","authenticated-orcid":false,"given":"Tianle","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9543-3754","authenticated-orcid":false,"given":"Linchao","family":"Bao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4598-1522","authenticated-orcid":false,"given":"Guiqing","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3802-4644","authenticated-orcid":false,"given":"Shengfeng","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,12,6]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Cai, Y., et al.: Exploiting spatial-temporal relationships for 3D pose estimation via graph convolutional networks. In: ICCV, pp. 2272\u20132281 (2019)","DOI":"10.1109\/ICCV.2019.00236"},{"issue":"1","key":"18_CR2","first-page":"198","volume":"32","author":"T Chen","year":"2021","unstructured":"Chen, T., Fang, C., Shen, X., Zhu, Y., Chen, Z., Luo, J.: Anatomy-aware 3D human pose estimation with bone-based pose decomposition. IEEE TCSVT 32(1), 198\u2013209 (2021)","journal-title":"IEEE TCSVT"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: CVPR, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y., Tu, Z., Ge, L., Zhang, D., Chen, R., Yuan, J.: So-handnet: self-organizing network for 3D hand pose estimation with semi-supervised learning. In: ICCV, pp. 6961\u20136970 (2019)","DOI":"10.1109\/ICCV.2019.00706"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Ci, H., Wang, C., Ma, X., Wang, Y.: Optimizing network structure for 3D human pose estimation. In: ICCV, pp. 2262\u20132271 (2019)","DOI":"10.1109\/ICCV.2019.00235"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Ci, H., et al.: GFPose: learning 3D human pose prior with gradient fields. In: CVPR, pp. 4800\u20134810 (2023)","DOI":"10.1109\/CVPR52729.2023.00465"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H.S., Xie, S., Tai, Y.W., Lu, C.: RMPE: regional multi-person pose estimation. In: ICCV, pp. 2334\u20132343 (2017)","DOI":"10.1109\/ICCV.2017.256"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Gong, J., Fan, Z., Ke, Q., Rahmani, H., Liu, J.: Meta agent teaming active learning for pose estimation. In: CVPR, pp. 11079\u201311089 (2022)","DOI":"10.1109\/CVPR52688.2022.01080"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Hossain, M.R.I., Little, J.J.: Exploiting temporal information for 3D human pose estimation. In: ECCV, pp. 68\u201384 (2018)","DOI":"10.1007\/978-3-030-01249-6_5"},{"issue":"2","key":"18_CR10","first-page":"764","volume":"27","author":"S Huang","year":"2017","unstructured":"Huang, S., Wang, W., He, S., Lau, R.W.: Egocentric temporal action proposals. IEEE TIP 27(2), 764\u2013777 (2017)","journal-title":"IEEE TIP"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE TPAMI 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, Y., et al.: Diffuse3D: wide-angle 3D photography via bilateral diffusion. In: ICCV, pp. 8998\u20139008 (2023)","DOI":"10.1109\/ICCV51070.2023.00826"},{"key":"18_CR13","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","volume":"25","author":"W Li","year":"2022","unstructured":"Li, W., Liu, H., Ding, R., Liu, M., Wang, P., Yang, W.: Exploiting temporal contexts with strided transformer for 3d human pose estimation. IEEE Trans. Multimed. 25, 1282\u20131293 (2022)","journal-title":"IEEE Trans. Multimed."},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Van\u00a0Gool, L.: MHFormer: multi-hypothesis transformer for 3D human pose estimation. In: CVPR, pp. 13147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Lin, K., Wang, L., Liu, Z.: End-to-end human pose and mesh reconstruction with transformers. In: CVPR, pp. 1954\u20131963 (2021)","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Lin, M., Lin, L., Liang, X., Wang, K., Cheng, H.: Recurrent 3D pose sequence machines. In: CVPR, pp. 810\u2013819 (2017)","DOI":"10.1109\/CVPR.2017.588"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Liu, R., Shen, J., Wang, H., Chen, C., Cheung, S., Asari, V.: Attention mechanism exploits temporal contexts: real-time 3D human pose reconstruction. In: CVPR, pp. 5064\u20135073 (2020)","DOI":"10.1109\/CVPR42600.2020.00511"},{"key":"18_CR18","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.J.: A simple yet effective baseline for 3D human pose estimation. In: ICCV, pp. 2640\u20132649 (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516. IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"issue":"4","key":"18_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta, D., et al.: Vnect: real-time 3D human pose estimation with a single RGB camera. ACM TOG 36(4), 1\u201314 (2017)","journal-title":"ACM TOG"},{"key":"18_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/978-3-030-58571-6_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"G Moon","year":"2020","unstructured":"Moon, G., Lee, K.M.: I2L-MeshNet: image-to-lixel prediction network for accurate 3D human pose and mesh estimation from a single RGB image. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 752\u2013768. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_44"},{"key":"18_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"18_CR24","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch. In: NeurIPS (2017)"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Daniilidis, K.: Ordinal depth supervision for 3D human pose estimation. In: CVPR, pp. 7307\u20137316 (2018)","DOI":"10.1109\/CVPR.2018.00763"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3D human pose estimation in video with temporal convolutions and semi-supervised training. In: CVPR, pp. 7753\u20137762 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"18_CR27","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1007\/978-3-031-20065-6_27","volume-title":"ECCV 2022","author":"W Shan","year":"2022","unstructured":"Shan, W., Liu, Z., Zhang, X., Wang, S., Ma, S., Gao, W.: P-STMO: pre-trained spatial temporal many-to-one model for 3D human pose estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13665, pp. 461\u2013478. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_27"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Shan, W., et al.: Diffusion-based 3D human pose estimation with multi-hypothesis aggregation. In: ICCV, pp. 14761\u201314771 (2023)","DOI":"10.1109\/ICCV51070.2023.01356"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Mehraban, S., Adeli, V., Taati, B.: Motionagformer: enhancing 3D human pose estimation with a transformer-gcnformer network. In: WACV (2024)","DOI":"10.1109\/WACV57701.2024.00677"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Tang, Z., Qiu, Z., Hao, Y., Hong, R., Yao, T.: 3D human pose estimation with spatio-temporal criss-cross attention. In: CVPR, pp. 4790\u20134799 (2023)","DOI":"10.1109\/CVPR52729.2023.00464"},{"key":"18_CR31","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, vol. 30 (2017)"},{"key":"18_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1007\/978-3-030-58601-0_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Wang","year":"2020","unstructured":"Wang, J., Yan, S., Xiong, Y., Lin, D.: Motion guided 3D pose estimation from videos. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12358, pp. 764\u2013780. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58601-0_45"},{"issue":"10","key":"18_CR33","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. IEEE TPAMI 43(10), 3349\u20133364 (2020)","journal-title":"IEEE TPAMI"},{"issue":"8","key":"18_CR34","first-page":"3761","volume":"32","author":"Y Xu","year":"2020","unstructured":"Xu, Y., Han, C., Qin, J., Xu, X., Han, G., He, S.: Transductive zero-shot action recognition via visually connected graph convolutional networks. IEEE TNNLS 32(8), 3761\u20133769 (2020)","journal-title":"IEEE TNNLS"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: AAAI, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"18_CR36","unstructured":"Yang, X., Xu, K., Chen, S., He, S., Yin, B.Y., Lau, R.: Active matting. In: NeurIPS, vol. 31 (2018)"},{"key":"18_CR37","first-page":"379","volume":"31","author":"M Ye","year":"2021","unstructured":"Ye, M., Li, H., Du, B., Shen, J., Shao, L., Hoi, S.C.: Collaborative refining for person re-identification with label noise. IEEE TIP 31, 379\u2013391 (2021)","journal-title":"IEEE TIP"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Yoon, J.S., Liu, L., Golyanik, V., Sarkar, K., Park, H.S., Theobalt, C.: Pose-guided human animation from a single image in the wild. In: CVPR, pp. 15039\u201315048 (2021)","DOI":"10.1109\/CVPR46437.2021.01479"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Yu, B.X., Zhang, Z., Liu, Y., Zhong, S., Liu, Y., Chen, C.W.: GLA-GCN: global-local adaptive graph convolutional network for 3D human pose estimation from monocular video. In: ICCV, pp. 8818\u20138829 (2023)","DOI":"10.1109\/ICCV51070.2023.00810"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, C., Yang, T., Weng, J., Cao, M., Wang, J., Zou, Y.: Unsupervised pre-training for temporal action localization tasks. In: CVPR, pp. 14031\u201314041 (2022)","DOI":"10.1109\/CVPR52688.2022.01364"},{"issue":"1","key":"18_CR41","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1049\/cit2.12012","volume":"7","author":"J Zhang","year":"2022","unstructured":"Zhang, J., et al.: A spatial attentive and temporal dilated (SATD) GCN for skeleton-based action recognition. CAAI Trans. Intell. Technol. 7(1), 46\u201355 (2022)","journal-title":"CAAI Trans. Intell. Technol."},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: Mixste: seq2seq mixed spatio-temporal encoder for 3D human pose estimation in video. In: CVPR, pp. 13232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X., Tian, Y., Kapadia, M., Metaxas, D.N.: Semantic graph convolutional networks for 3D human pose regression. In: CVPR, pp. 3425\u20133435 (2019)","DOI":"10.1109\/CVPR.2019.00354"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Zheng, C., Liu, M., Wang, P., Chen, C.: Poseformerv2: exploring frequency domain for efficient and robust 3D human pose estimation. In: CVPR, pp. 8877\u20138886 (2023)","DOI":"10.1109\/CVPR52729.2023.00857"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3D human pose estimation with spatial and temporal transformers. In: ICCV, pp. 11656\u201311665 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: MotionBERT: a unified perspective on learning human motion representations. In: ICCV, pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72655-2_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T11:31:34Z","timestamp":1733398294000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72655-2_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,6]]},"ISBN":["9783031726545","9783031726552"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72655-2_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,6]]},"assertion":[{"value":"6 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}