{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:10:34Z","timestamp":1767323434867,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557363","type":"print"},{"value":"9789819557370","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5737-0_18","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:08:13Z","timestamp":1767323293000},"page":"247-261","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Diffusion in\u00a0Time and\u00a0Frequency Domains for\u00a0Efficient 3D Human Pose Estimation in\u00a0Videos"],"prefix":"10.1007","author":[{"given":"Yong","family":"Gu","sequence":"first","affiliation":[]},{"given":"Yao","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Longjie","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Xiaohan\u00a0Nie, B., Xiong, C., Zhu, S.-C.: Joint action recognition and pose estimation from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a01293\u20131301 (2015)","DOI":"10.1109\/CVPR.2015.7298734"},{"issue":"1","key":"18_CR2","first-page":"8857748","volume":"2020","author":"Q Fu","year":"2020","unstructured":"Fu, Q., Zhang, X., Xu, J., Zhang, H.: Capture of 3D human motion pose in virtual reality based on video recognition. Complexity 2020(1), 8857748 (2020)","journal-title":"Complexity"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Huo, R., Gao, Q., Qi, J., Ju, Z.: 3D human pose estimation in video for human-computer\/robot interaction. In: International Conference on Intelligent Robotics and Applications, pp.\u00a0176\u2013187, Springer, Heidelberg (2023)","DOI":"10.1007\/978-981-99-6498-7_16"},{"issue":"1","key":"18_CR4","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2019","unstructured":"Cao, Z., Hidalgo, G., Simon, T., Wei, S.-E., Sheikh, Y.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. 43(1), 172\u2013186 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"6","key":"18_CR5","doi-asserted-by":"publisher","first-page":"7157","DOI":"10.1109\/TPAMI.2022.3222784","volume":"45","author":"H-S Fang","year":"2022","unstructured":"Fang, H.-S., et al.: AlphaPose: whole-body regional multi-person pose estimation and tracking in real-time. IEEE Trans. Pattern Anal. Mach. Intell. 45(6), 7157\u20137173 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR6","unstructured":"Kang, H., Wang, Y., Liu, M., Wu, D., Liu, P., Yang, W.: Double-chain constraints for 3D human pose estimation in images and videos. arXiv preprint arXiv:2308.05298 (2023)"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Moreno-Noguer, F.: 3D human pose estimation from a single image via distance matrix regression. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a02823\u20132832 (2017)","DOI":"10.1109\/CVPR.2017.170"},{"key":"18_CR8","unstructured":"Granlund, G.H., Knutsson, H.: Signal Processing for Computer Vision. Springer, Heidelberg (2013)"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Zheng, C., Liu, M., Wang, P., Chen, C.: PoseFormerV2: exploring frequency domain for efficient and robust 3D human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a08877\u20138886 (2023)","DOI":"10.1109\/CVPR52729.2023.00857"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a07103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Wehrbein, T., Rudolph, M., Rosenhahn, B., Wandt, B.: Probabilistic monocular 3D human pose estimation with normalizing flows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a011199\u201311208 (2021)","DOI":"10.1109\/ICCV48922.2021.01101"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Ci, H., et al.: GFPose: learning 3D human pose prior with gradient fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a04800\u20134810 (2023)","DOI":"10.1109\/CVPR52729.2023.00465"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Kang, H., et al.: Diffusion-based pose refinement and multi-hypothesis generation for 3D human pose estimation. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a05130\u20135134. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10445850"},{"issue":"7","key":"18_CR14","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2013","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Mehta, D., et al.: Monocular 3D human pose estimation in the wild using improved CNN supervision. In: 2017 international conference on 3D vision (3DV), pp.\u00a0506\u2013516. IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"issue":"4","key":"18_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta, D., et al.: VNect: real-time 3D human pose estimation with a single RGB camera. ACM Trans. Graph. (ToG) 36(4), 1\u201314 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"18_CR17","first-page":"1296","volume":"37","author":"H Li","year":"2023","unstructured":"Li, H., et al.: Pose-oriented transformer with uncertainty-guided refinement for 2D-to-3D human pose estimation. Proc. AAAI Conf. Artif. Intell. 37, 1296\u20131304 (2023)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"18_CR18","first-page":"98717","volume":"37","author":"W Wang","year":"2024","unstructured":"Wang, W., Xiao, J., Wang, C., Liu, W., Wang, Z., Chen, L.: D$$i^2pose$$: discrete diffusion model for occluded 3D human pose estimation. Adv. Neural. Inf. Process. Syst. 37, 98717\u201398741 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3D human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a07753\u20137762 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Cai, Y., Ge, L., Liu, J., Cai, J., Cham, T.-J., Yuan, J., Thalmann, N.M.: Exploiting spatial-temporal relationships for 3D pose estimation via graph convolutional networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a02272\u20132281 (2019)","DOI":"10.1109\/ICCV.2019.00236"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Dabral, R., Mundhada, A., Kusupati, U., Afaque, S., Sharma, A., Jain, A.: Learning 3D human pose from structure and motion. In: Proceedings of the European Conference on Computer Vision (ECCV), pp.\u00a0668\u2013683 (2018)","DOI":"10.1007\/978-3-030-01240-3_41"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Shan, W., Liu, Z., Zhang, X., Wang, S., Ma, S., Gao, W.: P-STMO: pre-trained spatial temporal many-to-one model for 3d human pose estimation. In: European Conference on Computer Vision, pp.\u00a0461\u2013478. Springer, Heidelberg (2022)","DOI":"10.1007\/978-3-031-20065-6_27"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Shan, W., et al.: Diffusion-based 3D human pose estimation with multi-hypothesis aggregation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a014761\u201314771 (2023)","DOI":"10.1109\/ICCV51070.2023.01356"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Gong, J., Foo, L.G., Fan, Z., Ke, Q., Rahmani, H., Liu, J.: DiffPose: toward more reliable 3D pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a013041\u201313051 (2023)","DOI":"10.1109\/CVPR52729.2023.01253"},{"key":"18_CR25","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR26","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"18_CR27","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","volume":"25","author":"W Li","year":"2022","unstructured":"Li, W., Liu, H., Ding, R., Liu, M., Wang, P., Yang, W.: Exploiting temporal contexts with strided transformer for 3D human pose estimation. IEEE Trans. Multimedia 25, 1282\u20131293 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: MixSTE: seq2seq mixed spatio-temporal encoder for 3D human pose estimation in video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a013232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Van Gool, L.: MHFormer: multi-hypothesis transformer for 3D human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a013147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Einfalt, M., Ludwig, K., Lienhart, R.: Uplift and upsample: efficient 3D human pose estimation with uplifting transformers. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp.\u00a02903\u20132913 (2023)","DOI":"10.1109\/WACV56688.2023.00292"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Cai, J., Liu, H., Ding, R., Li, W., Wu, J., Ban, M.: HTNet: human topology aware network for 3D human pose estimation. In: 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2023, pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10095949"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3D human pose estimation with spatial and temporal transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp.\u00a011656\u201311665 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"18_CR33","doi-asserted-by":"publisher","first-page":"104174","DOI":"10.1016\/j.jvcir.2024.104174","volume":"101","author":"Z Islam","year":"2024","unstructured":"Islam, Z., Hamza, A.B.: Multi-hop graph transformer network for 3D human pose estimation. J. Vis. Commun. Image Represent. 101, 104174 (2024)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"18_CR34","doi-asserted-by":"publisher","first-page":"110925","DOI":"10.1016\/j.patcog.2024.110925","volume":"158","author":"W Li","year":"2025","unstructured":"Li, W., et al.: GraphMLP: a graph MLP-like architecture for 3D human pose estimation. Pattern Recogn. 158, 110925 (2025)","journal-title":"Pattern Recogn."},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Zeng, X., Qin, H., Kong, M., Chen, L., Zhu, Q.: Probablistic restoration with adaptive noise sampling for 3D human pose estimation. In: 2024 IEEE International Conference on Multimedia and Expo (ICME), pp.\u00a01\u20136. IEEE (2024)","DOI":"10.1109\/ICME57554.2024.10687670"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5737-0_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:08:17Z","timestamp":1767323297000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5737-0_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557363","9789819557370"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5737-0_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}