{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:28:40Z","timestamp":1764588520844,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":44,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819720941"},{"type":"electronic","value":"9789819720958"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-2095-8_15","type":"book-chapter","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T13:01:41Z","timestamp":1711717301000},"page":"274-294","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A U-Shaped Spatio-Temporal Transformer as\u00a0Solver for\u00a0Motion Capture"],"prefix":"10.1007","author":[{"given":"Huabin","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhongjian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deyu","family":"Guan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kangshuai","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanru","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,30]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Aksan, E., Kaufmann, M., Hilliges, O.: Structured prediction helps 3D human motion modelling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7144\u20137153 (2019)","DOI":"10.1109\/ICCV.2019.00724"},{"issue":"2","key":"15_CR2","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1111\/cgf.13362","volume":"37","author":"A Aristidou","year":"2018","unstructured":"Aristidou, A., Cohen-Or, D., Hodgins, J.K., Shamir, A.: Self-similarity analysis for motion capture cleaning. Comput. Graph. Forum 37(2), 297\u2013309 (2018)","journal-title":"Comput. Graph. Forum"},{"issue":"1","key":"15_CR3","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/s00371-011-0671-y","volume":"29","author":"A Aristidou","year":"2013","unstructured":"Aristidou, A., Lasenby, J.: Real-time marker prediction and COR estimation in optical motion capture. Visual Comput. 29(1), 7\u201326 (2013)","journal-title":"Visual Comput."},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Bao, L., Yang, Z., Wang, S., Bai, D., Lee, J.: Real image denoising based on multi-scale residual dense block and cascaded u-net with block-connection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 1823\u20131831 (2020)","DOI":"10.1109\/CVPRW50498.2020.00232"},{"issue":"2","key":"15_CR5","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1109\/34.121791","volume":"14","author":"P Besl","year":"1992","unstructured":"Besl, P., McKay, N.D.: A method for registration of 3-D shapes. IEEE Trans. Pattern Anal. Mach. Intell. 14(2), 239\u2013256 (1992)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"15_CR6","doi-asserted-by":"publisher","first-page":"1854","DOI":"10.1016\/j.jbiomech.2016.04.016","volume":"49","author":"M Burke","year":"2016","unstructured":"Burke, M., Lasenby, J.: Estimating missing marker positions using low dimensional Kalman smoothing. J. Biomech. 49(9), 1854\u20131858 (2016)","journal-title":"J. Biomech."},{"issue":"3","key":"15_CR7","doi-asserted-by":"publisher","first-page":"686","DOI":"10.1145\/1073204.1073248","volume":"24","author":"J Chai","year":"2005","unstructured":"Chai, J., Hodgins, J.K.: Performance animation from low-dimensional control signals. ACM Trans. Graph. 24(3), 686\u2013696 (2005)","journal-title":"ACM Trans. Graph."},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Chai, J., Hodgins, J.K.: Constraint-based motion optimization using a statistical dynamic model. ACM Trans. Graph. 26(3), 8-es (2007)","DOI":"10.1145\/1276377.1276387"},{"issue":"4","key":"15_CR9","first-page":"1","volume":"40","author":"K Chen","year":"2021","unstructured":"Chen, K., Wang, Y., Zhang, S., Xu, S., Zhang, W., Hu, S.: MoCap-solver: a neural solver for optical motion capture data. ACM Trans. Graph. 40(4), 1\u201311 (2021)","journal-title":"ACM Trans. Graph."},{"key":"15_CR10","unstructured":"CMU. CMU MoCap Dataset (2000)"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Cui, Q., Sun, H., Li, Y., Kong, Y.: A deep bi-directional attention network for human motion recovery. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, pp. 701\u2013707. International Joint Conferences on Artificial Intelligence Organization (2019)","DOI":"10.24963\/ijcai.2019\/99"},{"key":"15_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"15_CR13","unstructured":"Dorfm\u00fcller-Ulhaas, K.: Robust optical user motion tracking using a Kalman filter (2007)"},{"key":"15_CR14","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"12","key":"15_CR15","doi-asserted-by":"publisher","first-page":"2693","DOI":"10.1109\/TCYB.2014.2381659","volume":"45","author":"Y Feng","year":"2015","unstructured":"Feng, Y., et al.: Mining spatial-temporal patterns and structural sparsity for human motion data denoising. IEEE Trans. Cybern. 45(12), 2693\u20132706 (2015)","journal-title":"IEEE Trans. Cybern."},{"key":"15_CR16","doi-asserted-by":"publisher","first-page":"777","DOI":"10.1016\/j.ins.2014.03.013","volume":"277","author":"Y Feng","year":"2014","unstructured":"Feng, Y., Xiao, J., Zhuang, Y., Yang, X., Zhang, J.J., Song, R.: Exploiting temporal stability and low-rank structure for motion capture data refinement. Inf. Sci. 277, 777\u2013793 (2014)","journal-title":"Inf. Sci."},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Ghorbani, N., Black, M.J.: SOMA: solving optical marker-based mocap automatically. In: Proceedings of International Conference on Computer Vision (ICCV), pp. 11117\u201311126 (2021)","DOI":"10.1109\/ICCV48922.2021.01093"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Herda, L., Fua, P., Pl\u00e4nkers, R., Boulic, R., Thalmann, D.: Skeleton-based motion capture for robust reconstruction of human motion. In: CA 2000, USA, p. 77. IEEE Computer Society (2000)","DOI":"10.1109\/CA.2000.889046"},{"issue":"4","key":"15_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3197517.3201302","volume":"37","author":"D Holden","year":"2018","unstructured":"Holden, D.: Robust solving of optical motion capture data by denoising. ACM Trans. Graph. 37(4), 1\u201312 (2018)","journal-title":"ACM Trans. Graph."},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Holden, D., Saito, J., Komura, T., Joyce, T.: Learning motion manifolds with convolutional autoencoders. In: SA 2015, New York, NY, USA. Association for Computing Machinery (2015)","DOI":"10.1145\/2820903.2820918"},{"key":"15_CR21","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations (2014)"},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Kirk, A., O\u2019Brien, J., Forsyth, D.: Skeletal parameter estimation from optical motion capture data. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR 2005), vol. 2, pp. 782\u2013788 (2005)","DOI":"10.1109\/CVPR.2005.326"},{"key":"15_CR23","unstructured":"Lai, R.Y.Q., Yuen, P.C., Lee, K.K.W.: Motion capture data completion and denoising by singular value thresholding. In: Avis, N., Lefebvre, S. (eds.) Eurographics 2011 - Short Papers. The Eurographics Association (2011)"},{"key":"15_CR24","unstructured":"Li, L., McCann, J., Pollard, N., Faloutsos, C.: Bolero: a principled technique for including bone length constraints in motion capture occlusion filling. In: Proceedings of the 2010 ACM SIGGRAPH\/Eurographics Symposium on Computer Animation, SCA 2010, pp. 179\u2013188, Goslar, DEU (2010)"},{"key":"15_CR25","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.cag.2019.03.010","volume":"81","author":"S Li","year":"2019","unstructured":"Li, S., Zhou, Y., Zhu, H., Xie, W., Zhao, Y., Liu, X.: Bidirectional recurrent autoencoder for 3D skeleton motion data refinement. Comput. Graph. 81, 92\u2013103 (2019)","journal-title":"Comput. Graph."},{"issue":"9","key":"15_CR26","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1007\/s00371-006-0080-9","volume":"22","author":"G Liu","year":"2006","unstructured":"Liu, G., McMillan, L.: Estimation of missing markers in human motion capture. Vis. Comput. 22(9), 721\u2013728 (2006)","journal-title":"Vis. Comput."},{"key":"15_CR27","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1016\/j.sigpro.2014.06.009","volume":"105","author":"X Liu","year":"2014","unstructured":"Liu, X., Cheung, Y.M., Peng, S.-J., Cui, Z., Zhong, B., Du, J.-X.: Automatic motion capture data denoising via filtered subspace clustering and low rank matrix approximation. Signal Process. 105, 350\u2013362 (2014)","journal-title":"Signal Process."},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. ACM Trans. Graph. 34(6) (2015)","DOI":"10.1145\/2816795.2818013"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Luan, J., Jiang, H., Diao, J., Wang, Y., Xiao, J.: Memformer: transformer-based 3D human motion estimation from mocap markers. In: SIGGRAPH Asia 2022 Posters, pp. 1\u20132 (2022)","DOI":"10.1145\/3550082.3564197"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.: Amass: archive of motion capture as surface shapes. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5441\u20135450 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"issue":"9","key":"15_CR31","doi-asserted-by":"publisher","first-page":"2513","DOI":"10.1007\/s11263-021-01483-7","volume":"129","author":"W Mao","year":"2021","unstructured":"Mao, W., Liu, M., Salzmann, M., Li, H.: Multi-level motion attention for human motion prediction. Int. J. Comput. Vision 129(9), 2513\u20132535 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Mei, J., Chen, X., Wang, C., Yuille, A., Lan, X., Zeng, W.: Learning to refine 3D human pose sequences. In: 2019 International Conference on 3D Vision (3DV), pp. 358\u2013366. IEEE (2019)","DOI":"10.1109\/3DV.2019.00047"},{"key":"15_CR33","unstructured":"M\u00fcller, M., R\u00f6der, T., Clausen, M., Eberhardt, B., Kr\u00fcger, B., Weber, A.: Documentation mocap database HDM05. Technical report CG-2007-2, Universit\u00e4t Bonn (2007)"},{"key":"15_CR34","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention - MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W., Frangi, A. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"issue":"8","key":"15_CR35","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1021\/ac60214a047","volume":"36","author":"A Savitzky","year":"1964","unstructured":"Savitzky, A., Golay, M.J.: Smoothing and differentiation of data by simplified least squares procedures. Anal. Chem. 36(8), 1627\u20131639 (1964)","journal-title":"Anal. Chem."},{"issue":"3","key":"15_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1966394.1966397","volume":"30","author":"J Tautges","year":"2011","unstructured":"Tautges, J., et al.: Motion reconstruction using sparse accelerometer data. ACM Trans. Graph. 30(3), 1\u201312 (2011)","journal-title":"ACM Trans. Graph."},{"key":"15_CR37","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"15_CR38","unstructured":"Vicon. Vicon software (2023)"},{"key":"15_CR39","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"15_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Z., Cun, X., Bao, J., Zhou, W., Liu, J., Li, H.: Uformer: a general U-shaped transformer for image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17683\u201317693 (2022)","DOI":"10.1109\/CVPR52688.2022.01716"},{"issue":"2\u20133","key":"15_CR41","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1002\/cav.413","volume":"22","author":"J Xiao","year":"2011","unstructured":"Xiao, J., Feng, Y., Hu, W.: Predicting missing markers in human motion capture using L1-sparse representation. Comput. Animat. Virtual Worlds 22(2\u20133), 221\u2013228 (2011)","journal-title":"Comput. Animat. Virtual Worlds"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Zamir, S.W., Arora, A., Khan, S., Hayat, M., Khan, F.S., Yang, M.H.: Restormer: efficient transformer for high-resolution image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5728\u20135739 (2022)","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"15_CR43","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1007\/978-3-031-20065-6_36","volume-title":"Computer Vision - ECCV 2022","author":"A Zeng","year":"2022","unstructured":"Zeng, A., Yang, L., Ju, X., Li, J., Wang, J., Xu, Q.: Smoothnet: a plug-and-play network for refining human poses in videos. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13665, pp. 625\u2013642. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_36"},{"key":"15_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Barnes, C., Lu, J., Yang, J., Li, H.: On the continuity of rotation representations in neural networks. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5738\u20135746 (2019)","DOI":"10.1109\/CVPR.2019.00589"}],"container-title":["Lecture Notes in Computer Science","Computational Visual Media"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-2095-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T08:11:33Z","timestamp":1731658293000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-2095-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819720941","9789819720958"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-2095-8_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"30 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CVM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Visual Media","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wellington","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 April 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 April 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cvm2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CVM submission system","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"212","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}