{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,21]],"date-time":"2026-07-21T09:44:30Z","timestamp":1784627070896,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["82472116"],"award-info":[{"award-number":["82472116"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Natural Science Foundation of Shanghai","doi-asserted-by":"publisher","award":["24ZR1404100"],"award-info":[{"award-number":["24ZR1404100"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733437","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:29:43Z","timestamp":1750876183000},"page":"1849-1857","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SSD-Poser: Avatar Pose Estimation with State Space Duality from Sparse Observations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0996-0835","authenticated-orcid":false,"given":"Shuting","family":"Zhao","sequence":"first","affiliation":[{"name":"Academy for Engineering &amp; Technology, Fudan University, Shanghai, China and Shanghai Key Laboratory of Medical Image Computing and Computer Assisted Intervention, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3410-3803","authenticated-orcid":false,"given":"Linxin","family":"Bai","sequence":"additional","affiliation":[{"name":"Academy for Engineering &amp; Technology, Fudan University, Shanghai, China and Shanghai Key Laboratory of Medical Image Computing and Computer Assisted Intervention, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9176-579X","authenticated-orcid":false,"given":"Liangjing","family":"Shao","sequence":"additional","affiliation":[{"name":"Academy for Engineering &amp; Technology, Fudan University, Shanghai, China and Shanghai Key Laboratory of Medical Image Computing and Computer Assisted Intervention, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8012-6364","authenticated-orcid":false,"given":"Ye","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Vocational and Technical Teacher Education, Shanghai Polytechnic University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4186-0678","authenticated-orcid":false,"given":"Xinrong","family":"Chen","sequence":"additional","affiliation":[{"name":"Academy for Engineering &amp; Technology, Fudan University, Shanghai, China and Shanghai Key Laboratory of Medical Image Computing and Computer Assisted Intervention, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Advanced Computing Center for the Arts and Design. [n.d.]. ACCAD MoCap Dataset. https:\/\/accad.osu.edu\/research\/motion-lab\/mocap-system-and-data"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298751"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01290"},{"key":"e_1_3_2_1_4_1","unstructured":"Carnegie Mellon University. [n.d.]. Carnegie Mellon University MoCap Dataset. http:\/\/mocap.cs.cmu.edu"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00456"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186822.1073248"},{"key":"e_1_3_2_1_7_1","volume-title":"Enhancing Motion Reconstruction From Sparse Tracking Inputs With Kinematic Constraints","author":"Dai Xiaokun","year":"2024","unstructured":"Xiaokun Dai, Xinkang Zhang, Shiman Li, and Xinrong Chen. 2024. Enhancing Motion Reconstruction From Sparse Tracking Inputs With Kinematic Constraints. IEEE Transactions on Automation Science and Engineering (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"Transformers are SSMs: Generalized models and efficient algorithms through structured state space duality. arXiv preprint arXiv:2405.21060","author":"Dao Tri","year":"2024","unstructured":"Tri Dao and Albert Gu. 2024. Transformers are SSMs: Generalized models and efficient algorithms through structured state space duality. arXiv preprint arXiv:2405.21060 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00280"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01148"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00054"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00023"},{"key":"e_1_3_2_1_13_1","volume-title":"MoVi: A Large Multipurpose Motion and Video Dataset. arXiv","author":"Ghorbani Saeed","year":"2020","unstructured":"Saeed Ghorbani, K Mahdaviani, Anne Thaler, K Kording, DJ Cook, G Blohm, and NF Troje. 2020. MoVi: A Large Multipurpose Motion and Video Dataset. arXiv 2020. arXiv preprint arXiv:2003.01888 (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752","author":"Gu Albert","year":"2023","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:2111.00396","author":"Gu Albert","year":"2021","unstructured":"Albert Gu, Karan Goel, and Christopher R\u00e9. 2021a. Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:2111.00396 (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"Combining recurrent, convolutional, and continuous-time models with linear state space layers. Advances in neural information processing systems","author":"Gu Albert","year":"2021","unstructured":"Albert Gu, Isys Johnson, Karan Goel, Khaled Saab, Tri Dao, Atri Rudra, and Christopher R\u00e9. 2021b. Combining recurrent, convolutional, and continuous-time models with linear state space layers. Advances in neural information processing systems, Vol. 34 (2021), 572--585."},{"key":"e_1_3_2_1_17_1","volume-title":"Learning human-to-humanoid real-time whole-body teleoperation. arXiv preprint arXiv:2403.04436","author":"He Tairan","year":"2024","unstructured":"Tairan He, Zhengyi Luo, Wenli Xiao, Chong Zhang, Kris Kitani, Changliu Liu, and Guanya Shi. 2024. Learning human-to-humanoid real-time whole-body teleoperation. arXiv preprint arXiv:2403.04436 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"Ml-mamba: Efficient multi-modal large language model utilizing mamba-2. arXiv preprint arXiv:2407.19832","author":"Huang Wenjun","year":"2024","unstructured":"Wenjun Huang and Jianguo Hu. 2024. Ml-mamba: Efficient multi-modal large language model utilizing mamba-2. arXiv preprint arXiv:2407.19832 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3272127.3275108","article-title":"Deep inertial poser: Learning to reconstruct human pose from sparse inertial measurements in real time","volume":"37","author":"Huang Yinghao","year":"2018","unstructured":"Yinghao Huang, Manuel Kaufmann, Emre Aksan, Michael J Black, Otmar Hilliges, and Gerard Pons-Moll. 2018. Deep inertial poser: Learning to reconstruct human pose from sparse inertial measurements in real time. ACM Transactions on Graphics (TOG), Vol. 37, 6 (2018), 1--15.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_26"},{"key":"e_1_3_2_1_21_1","volume-title":"Transformer inertial poser: attention-based real-time human motion reconstruction from sparse IMUs. arXiv e-prints, arXiv--2203. doi: 10.48550. arXiv preprint arXiv.2203.15720","author":"Jiang Y","year":"2022","unstructured":"Y Jiang, Y Ye, D Gopinath, J Won, AW Winkler, and CK Liu. 2022b. Transformer inertial poser: attention-based real-time human motion reconstruction from sparse IMUs. arXiv e-prints, arXiv--2203. doi: 10.48550. arXiv preprint arXiv.2203.15720 (2022)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555428"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Rudolph Emil Kalman. 1960. A new approach to linear filtering and prediction problems. (1960).","DOI":"10.1115\/1.3662552"},{"key":"e_1_3_2_1_24_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_25_1","volume-title":"Jamba: A hybrid transformer-mamba language model. arXiv preprint arXiv:2403.19887","author":"Lieber Opher","year":"2024","unstructured":"Opher Lieber, Barak Lenz, Hofit Bata, Gal Cohen, Jhonathan Osin, Itay Dalmedigos, Erez Safahi, Shaked Meirom, Yonatan Belinkov, Shai Shalev-Shwartz, et al. 2024. Jamba: A hybrid transformer-mamba language model. arXiv preprint arXiv:2403.19887 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661229.2661273"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596800"},{"key":"e_1_3_2_1_28_1","unstructured":"Eyes JAPAN Co. Ltd. [n. d.]. Eyes Japan MoCap Dataset. http:\/\/mocapdata.com"},{"key":"e_1_3_2_1_29_1","volume-title":"U-mamba: Enhancing long-range dependency for biomedical image segmentation. arXiv preprint arXiv:2401.04722","author":"Ma Jun","year":"2024","unstructured":"Jun Ma, Feifei Li, and Bo Wang. 2024. U-mamba: Enhancing long-range dependency for biomedical image segmentation. arXiv preprint arXiv:2401.04722 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00554"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAR.2015.7251476"},{"key":"e_1_3_2_1_32_1","volume-title":"Long range language modeling via gated state spaces. arXiv preprint arXiv:2206.13947","author":"Mehta Harsh","year":"2022","unstructured":"Harsh Mehta, Ankit Gupta, Ashok Cutkosky, and Behnam Neyshabur. 2022. Long range language modeling via gated state spaces. arXiv preprint arXiv:2206.13947 (2022)."},{"key":"e_1_3_2_1_33_1","volume-title":"Mocap database hdm05","author":"M\u00fcller Meinard","year":"2007","unstructured":"Meinard M\u00fcller, Tido R\u00f6der, Michael Clausen, Bernhard Eberhardt, Bj\u00f6rn Kr\u00fcger, and Andreas Weber. 2007. Mocap database hdm05. Institut f\u00fcr Informatik II, Universit\u00e4t Bonn, Vol. 2, 7 (2007)."},{"key":"e_1_3_2_1_34_1","volume-title":"Simba: Simplified mamba-based architecture for vision and multivariate time series. arXiv preprint arXiv:2403.15360","author":"Patro Badri N","year":"2024","unstructured":"Badri N Patro and Vijay S Agneeswaran. 2024. Simba: Simplified mamba-based architecture for vision and multivariate time series. arXiv preprint arXiv:2403.15360 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01129"},{"key":"e_1_3_2_1_36_1","volume-title":"Glu variants improve transformer. arXiv preprint arXiv:2002.05202","author":"Shazeer Noam","year":"2020","unstructured":"Noam Shazeer. 2020. Glu variants improve transformer. arXiv preprint arXiv:2002.05202 (2020)."},{"key":"e_1_3_2_1_37_1","volume-title":"Humaneva: Synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. International journal of computer vision","author":"Sigal Leonid","year":"2010","unstructured":"Leonid Sigal, Alexandru O Balan, and Michael J Black. 2010. Humaneva: Synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. International journal of computer vision, Vol. 87, 1 (2010), 4--27."},{"key":"e_1_3_2_1_38_1","volume-title":"Simplified state space layers for sequence modeling. arXiv preprint arXiv:2208.04933","author":"Smith Jimmy TH","year":"2022","unstructured":"Jimmy TH Smith, Andrew Warrington, and Scott W Linderman. 2022. Simplified state space layers for sequence modeling. arXiv preprint arXiv:2208.04933 (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1167\/2.5.2"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.14"},{"key":"e_1_3_2_1_41_1","unstructured":"Simon Fraser University and National University of Singapore. [n. d.]. SFU Motion Capture Database. http:\/\/mocap.cs.sfu.ca\/"},{"key":"e_1_3_2_1_42_1","unstructured":"Aaron Van Den Oord Oriol Vinyals et al. 2017. Neural discrete representation learning. Advances in neural information processing systems Vol. 30 (2017)."},{"key":"e_1_3_2_1_43_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_44_1","volume-title":"Computer graphics forum","author":"Marcard Timo Von","unstructured":"Timo Von Marcard, Bodo Rosenhahn, Michael J Black, and Gerard Pons-Moll. 2017. Sparse inertial poser: Automatic 3d human pose estimation from sparse imus. In Computer graphics forum, Vol. 36. Wiley Online Library, 349--360."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555411"},{"key":"e_1_3_2_1_46_1","volume-title":"Lobstr: Real-time lower-body pose prediction from sparse upper-body tracking signals. In Computer Graphics Forum","author":"Yang Dongseok","year":"2021","unstructured":"Dongseok Yang, Doyeon Kim, and Sung-Hee Lee. 2021. Lobstr: Real-time lower-body pose prediction from sparse upper-body tracking signals. In Computer Graphics Forum, Vol. 40. Wiley Online Library, 265--275."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01282"},{"key":"e_1_3_2_1_48_1","first-page":"1","article-title":"Transpose: Real-time 3d human translation and pose estimation with six inertial sensors","volume":"40","author":"Yi Xinyu","year":"2021","unstructured":"Xinyu Yi, Yuxiao Zhou, and Feng Xu. 2021. Transpose: Real-time 3d human translation and pose estimation with six inertial sensors. ACM Transactions On Graphics (TOG), Vol. 40, 4 (2021), 1--13.","journal-title":"ACM Transactions On Graphics (TOG)"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01349"},{"key":"e_1_3_2_1_50_1","volume-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:2401.09417","author":"Zhu Lianghui","year":"2024","unstructured":"Lianghui Zhu, Bencheng Liao, Qian Zhang, Xinlong Wang, Wenyu Liu, and Xinggang Wang. 2024. Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:2401.09417 (2024)."}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA","acronym":"ICMR '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733437","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:08:48Z","timestamp":1755749328000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733437"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":50,"alternative-id":["10.1145\/3731715.3733437","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733437","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}