{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:59:42Z","timestamp":1776931182871,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":97,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,15]]},"DOI":"10.1145\/3757377.3763948","type":"proceedings-article","created":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T16:27:29Z","timestamp":1765211249000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning Human Motion with Temporally Conditional Mamba"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-1831-5763","authenticated-orcid":false,"given":"Quang","family":"Nguyen","sequence":"first","affiliation":[{"name":"FPT Software AI Center, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2546-5651","authenticated-orcid":false,"given":"Tri","family":"Le","sequence":"additional","affiliation":[{"name":"FPT Software AI Center, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4421-652X","authenticated-orcid":false,"given":"Baoru","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom and Imperial College London, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0692-8830","authenticated-orcid":false,"given":"Minh Nhat","family":"Vu","sequence":"additional","affiliation":[{"name":"Vienna University of Technology, Wien, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2571-0511","authenticated-orcid":false,"given":"Ngan","family":"Le","sequence":"additional","affiliation":[{"name":"University of Arkansas at Little Rock, Arkansas, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7957-5648","authenticated-orcid":false,"given":"Thieu","family":"Vo","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1449-211X","authenticated-orcid":false,"given":"Anh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,14]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Simon Alexanderson Rajmund Nagy Jonas Beskow and Gustav\u00a0Eje Henter. 2023. Listen denoise action! audio-driven motion synthesis with diffusion models. ACM TOG (2023).","DOI":"10.1145\/3592458"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3672044"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Corentin Briat. 2014. Linear parameter-varying and time-delay systems. Analysis observation filtering & control (2014).","DOI":"10.1007\/978-3-662-44050-6"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Anthony Brohan Noah Brown Justice Carbajal Yevgen Chebotar Joseph Dabis Chelsea Finn Keerthana Gopalakrishnan Karol Hausman Alex Herzog Jasmine Hsu et\u00a0al. 2022. Rt-1: Robotics transformer for real-world control at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.06817 (2022).","DOI":"10.15607\/RSS.2023.XIX.025"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00603"},{"key":"e_1_3_3_2_7_1","unstructured":"Junsong Chen Jincheng Yu Chongjian Ge Lewei Yao Enze Xie Yue Wu Zhongdao Wang James Kwok Ping Luo Huchuan Lu et\u00a0al. 2023b. Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.00426 (2023)."},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00190"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"crossref","unstructured":"Cheng Chi Zhenjia Xu Siyuan Feng Eric Cousineau Yilun Du Benjamin Burchfiel Russ Tedrake and Shuran Song. 2023. Diffusion policy: Visuomotor policy learning via action diffusion. The International Journal of Robotics Research (2023).","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"e_1_3_3_2_11_1","volume-title":"ECCV","author":"Dai Wenxun","year":"2024","unstructured":"Wenxun Dai, Ling-Hao Chen, Jingbo Wang, Jinpeng Liu, Bo Dai, and Yansong Tang. 2024. Motionlcm: Real-time controllable motion generation via latent consistency model. In ECCV."},{"key":"e_1_3_3_2_12_1","unstructured":"Prafulla Dhariwal Heewoo Jun Christine Payne Jong\u00a0Wook Kim Alec Radford and Ilya Sutskever. 2020. Jukebox: A generative model for music. arXiv:https:\/\/arXiv.org\/abs\/2005.00341 (2020)."},{"key":"e_1_3_3_2_13_1","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. NeurIPS (2021)."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00054"},{"key":"e_1_3_3_2_15_1","unstructured":"Vincent Dumoulin Jonathon Shlens and Manjunath Kudlur. 2017. A learned representation for artistic style. ICLR (2017)."},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548099"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01834"},{"key":"e_1_3_3_2_18_1","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv:https:\/\/arXiv.org\/abs\/2312.00752 (2023)."},{"key":"e_1_3_3_2_19_1","volume-title":"ICLR","author":"Gu Albert","year":"2022","unstructured":"Albert Gu, Karan Goel, and Christopher R\u00e9. 2022. Efficiently Modeling Long Sequences with Structured State Spaces. In ICLR."},{"key":"e_1_3_3_2_20_1","unstructured":"Albert Gu Isys Johnson Karan Goel Khaled Saab Tri Dao Atri Rudra and Christopher R\u00e9. 2021. Combining recurrent convolutional and continuous-time models with linear state space layers. NeurIPS (2021)."},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00292"},{"key":"e_1_3_3_2_23_1","unstructured":"Ali Hatamizadeh and Jan Kautz. 2024. Mambavision: A hybrid mamba-transformer vision backbone. arXiv:https:\/\/arXiv.org\/abs\/2407.08083 (2024)."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_2_25_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. NeurIPS (2020)."},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00503"},{"key":"e_1_3_3_2_27_1","volume-title":"ECCV","author":"Hu Vincent\u00a0Tao","year":"2024","unstructured":"Vincent\u00a0Tao Hu, Stefan\u00a0Andreas Baumann, Ming Gui, Olga Grebenkova, Pingchuan Ma, Johannes Fischer, and Bj\u00f6rn Ommer. 2024. Zigma: A dit-style zigzag mamba diffusion model. In ECCV."},{"key":"e_1_3_3_2_28_1","volume-title":"ICLR","author":"Huang Ruozi","year":"2020","unstructured":"Ruozi Huang, Huang Hu, Wei Wu, Kei Sawada, Mi Zhang, and Daxin Jiang. 2020. Dance revolution: Long-term dance generation with music via curriculum learning. In ICLR."},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01607"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_3_2_31_1","volume-title":"ECCV","author":"Huang Zikai","year":"2024","unstructured":"Zikai Huang, Xuemiao Xu, Cheng Xu, Huaidong Zhang, Chenxi Zheng, Jing Qin, and Shengfeng He. 2024. Beat-It: Beat-Synchronized Multi-Condition 3D Dance Generation. In ECCV."},{"key":"e_1_3_3_2_32_1","volume-title":"ICML","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In ICML."},{"key":"e_1_3_3_2_33_1","unstructured":"Biao Jiang Xin Chen Wen Liu Jingyi Yu Gang Yu and Tao Chen. 2023. Motiongpt: Human motion as a foreign language. NeurIPS (2023)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_26"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00348"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"crossref","unstructured":"Taesup Kim Inchul Song and Yoshua Bengio. 2017. Dynamic layer normalization for adaptive neural acoustic modeling in speech recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1707.06065 (2017).","DOI":"10.21437\/Interspeech.2017-556"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551626.3564936"},{"key":"e_1_3_3_2_40_1","unstructured":"Nhat Le Tuong Do Khoa Do Hien Nguyen Erman Tjiputra Quang\u00a0D Tran and Anh Nguyen. 2023a. Controllable group choreography using contrastive diffusion. ACM TOG (2023)."},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00838"},{"key":"e_1_3_3_2_42_1","volume-title":"NeurIPS","author":"Lee Byung-Kwan","year":"2024","unstructured":"Byung-Kwan Lee, Chae\u00a0Won Kim, Beomchan Park, and Yong\u00a0Man Ro. 2024. Meteor: Mamba-based Traversal of Rationale for Large Language and Vision Models. In NeurIPS, A.\u00a0Globerson, L.\u00a0Mackey, D.\u00a0Belgrave, A.\u00a0Fan, U.\u00a0Paquet, J.\u00a0Tomczak, and C.\u00a0Zhang (Eds.)."},{"key":"e_1_3_3_2_43_1","unstructured":"Haopeng Li Jinyue Yang Kexin Wang Xuerui Qiu Yuhong Chou Xin Li and Guoqi Li. 2024b. Scalable autoregressive image generation with mamba. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.12245 (2024)."},{"key":"e_1_3_3_2_44_1","volume-title":"ECCV","author":"Li Jiaman","year":"2024","unstructured":"Jiaman Li, Alexander Clegg, Roozbeh Mottaghi, Jiajun Wu, Xavier Puig, and C\u00a0Karen Liu. 2024a. Controllable human-object interaction synthesis. In ECCV."},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01644"},{"key":"e_1_3_3_2_46_1","unstructured":"Jiaman Li Jiajun Wu and C\u00a0Karen Liu. 2023b. Object motion guided human motion synthesis. ACM TOG (2023)."},{"key":"e_1_3_3_2_47_1","unstructured":"Jiaman Li Yihang Yin Hang Chu Yi Zhou Tingwu Wang Sanja Fidler and Hao Li. 2020. Learning to generate diverse dance motions with transformer. arXiv:https:\/\/arXiv.org\/abs\/2008.08171 (2020)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00151"},{"key":"e_1_3_3_2_50_1","unstructured":"Shufan Li Harkanwar Singh and Aditya Grover. 2025. Mamba-ND: Selective State Space Modeling for Multi-Dimensional Data. ECCV (2025)."},{"key":"e_1_3_3_2_51_1","volume-title":"NeurIPS","author":"Liang Dingkang","year":"2024","unstructured":"Dingkang Liang, Xin Zhou, Wei Xu, Xingkui Zhu, Zhikang Zou, Xiaoqing Ye, Xiao Tan, and Xiang Bai. 2024. Pointmamba: A simple state space model for point cloud analysis. In NeurIPS."},{"key":"e_1_3_3_2_52_1","unstructured":"Bencheng Liao Wenyu Liu Xinggang Wang Xinlong Wang Qian Zhang and Lianghui Zhu. 2024. Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.09417 (2024)."},{"key":"e_1_3_3_2_53_1","unstructured":"Haohe Liu Zehua Chen Yi Yuan Xinhao Mei Xubo Liu Danilo Mandic Wenwu Wang and Mark\u00a0D Plumbley. 2023. Audioldm: Text-to-audio generation with latent diffusion models. ICML (2023)."},{"key":"e_1_3_3_2_54_1","unstructured":"Jiuming Liu Ruiji Yu Yian Wang Yu Zheng Tianchen Deng Weicai Ye and Hesheng Wang. 2024. Point mamba: A novel point cloud backbone based on state space model with octree-based ordering strategy. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.06467 (2024)."},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596800"},{"key":"e_1_3_3_2_56_1","unstructured":"Zhengyi Luo Ryo Hachiuma Ye Yuan and Kris Kitani. 2021. Dynamics-regulated kinematic policy for egocentric pose estimation. NeuRIPS (2021)."},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00677"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"crossref","unstructured":"Ambuj Mehrish Navonil Majumder Rishabh Bharadwaj Rada Mihalcea and Soujanya Poria. 2023. A review of deep learning techniques for speech processing. Information Fusion (2023).","DOI":"10.1016\/j.inffus.2023.101869"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-1833-7"},{"key":"e_1_3_3_2_60_1","unstructured":"Quang Nguyen Nhat Le Baoru Huang Minh\u00a0Nhat Vu Chengcheng Tang Van Nguyen Ngan Le Thieu Vo and Anh Nguyen. 2025. EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba. arXiv:https:\/\/arXiv.org\/abs\/2508.10522 (2025)."},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3371382.3378386"},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_3_2_66_1","unstructured":"Hao Phung Quan Dao Trung Dao Hoang Phan Dimitris Metaxas and Anh Tran. 2024. DiMSUM: Diffusion Mamba\u2013A Scalable and Unified Spatial-Frequency Method for Image Generation. arXiv:https:\/\/arXiv.org\/abs\/2411.04168 (2024)."},{"key":"e_1_3_3_2_67_1","volume-title":"ECCV","author":"Pinyoanuntapong Ekkasit","year":"2024","unstructured":"Ekkasit Pinyoanuntapong, Muhammad\u00a0Usama Saleem, Pu Wang, Minwoo Lee, Srijan Das, and Chen Chen. 2024. BAMM: bidirectional autoregressive motion model. In ECCV."},{"key":"e_1_3_3_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01129"},{"key":"e_1_3_3_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"e_1_3_3_2_70_1","doi-asserted-by":"crossref","unstructured":"Sebastian Starke Ian Mason and Taku Komura. 2022. Deepphase: Periodic autoencoders for learning motion phase manifolds. ACM ToG (2022).","DOI":"10.1145\/3528223.3530178"},{"key":"e_1_3_3_2_71_1","doi-asserted-by":"crossref","unstructured":"Sebastian Starke Paul Starke Nicky He Taku Komura and Yuting Ye. 2024. Categorical codebook matching for embodied character controllers. ACM TOG (2024).","DOI":"10.1145\/3658209"},{"key":"e_1_3_3_2_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01291"},{"key":"e_1_3_3_2_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02008"},{"key":"e_1_3_3_2_74_1","unstructured":"Octo\u00a0Model Team Dibya Ghosh Homer Walke Karl Pertsch Kevin Black Oier Mees Sudeep Dasari Joey Hejna Tobias Kreiman Charles Xu et\u00a0al. 2024. Octo: An open-source generalist robot policy. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.12213 (2024)."},{"key":"e_1_3_3_2_75_1","unstructured":"Yao Teng Yue Wu Han Shi Xuefei Ning Guohao Dai Yu Wang Zhenguo Li and Xihui Liu. 2024. Dim: Diffusion mamba for efficient high-resolution image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.14224 (2024)."},{"key":"e_1_3_3_2_76_1","volume-title":"ICLR","author":"Tevet Guy","year":"2023","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yoni Shafir, Daniel Cohen-or, and Amit\u00a0Haim Bermano. 2023. Human Motion Diffusion Model. In ICLR."},{"key":"e_1_3_3_2_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"e_1_3_3_2_78_1","unstructured":"Mehmet\u00a0Ozgur Turkoglu Alexander Becker H\u00fcseyin\u00a0Anil G\u00fcnd\u00fcz Mina Rezaei Bernd Bischl Rodrigo\u00a0Caye Daudt Stefano D\u2019Aronco Jan Wegner and Konrad Schindler. 2022. Film-ensemble: Probabilistic deep learning via feature-wise linear modulation. NeurIPS (2022)."},{"key":"e_1_3_3_2_79_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. NeurIPS (2017)."},{"key":"e_1_3_3_2_80_1","unstructured":"Chloe Wang Oleksii Tsepa Jun Ma and Bo Wang. 2024c. Graph-mamba: Towards long-range graph sequence modeling with selective state spaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.00789 (2024)."},{"key":"e_1_3_3_2_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00080"},{"key":"e_1_3_3_2_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01252"},{"key":"e_1_3_3_2_83_1","unstructured":"Xinghan Wang Zixi Kang and Yadong Mu. 2024b. Text-controlled motion mamba: text-instructed temporal grounding of human motion. arXiv:https:\/\/arXiv.org\/abs\/2404.11375 (2024)."},{"key":"e_1_3_3_2_84_1","unstructured":"Ziyang Wang Jian-Qing Zheng Yichi Zhang Ge Cui and Lei Li. 2024d. Mamba-unet: Unet-like pure visual mamba for medical image segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05079 (2024)."},{"key":"e_1_3_3_2_85_1","unstructured":"Jingjing Xu Xu Sun Zhiyuan Zhang Guangxiang Zhao and Junyang Lin. 2019. Understanding and improving layer normalization. NeurIPS (2019)."},{"key":"e_1_3_3_2_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00787"},{"key":"e_1_3_3_2_87_1","unstructured":"Weihao Yuan Yisheng He Weichao Shen Yuan Dong Xiaodong Gu Zilong Dong Liefeng Bo and Qixing Huang. 2024. Mogents: Motion generation based on spatial-temporal joint modeling. NeurIPS (2024)."},{"key":"e_1_3_3_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01018"},{"key":"e_1_3_3_2_89_1","volume-title":"CVPR","author":"Zhang Canyu","year":"2024","unstructured":"Canyu Zhang, Youbao Tang, Ning Zhang, Ruei-Sung Lin, Mei Han, Jing Xiao, and Song Wang. 2024c. Bidirectional autoregessive diffusion model for dance generation. In CVPR."},{"key":"e_1_3_3_2_90_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"e_1_3_3_2_91_1","doi-asserted-by":"crossref","unstructured":"Mingyuan Zhang Zhongang Cai Liang Pan Fangzhou Hong Xinying Guo Lei Yang and Ziwei Liu. 2024a. Motiondiffuse: Text-driven human motion generation with diffusion model. TPAMI (2024).","DOI":"10.1109\/TPAMI.2024.3355414"},{"key":"e_1_3_3_2_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889111"},{"key":"e_1_3_3_2_93_1","volume-title":"ECCV","author":"Zhang Zeyu","year":"2024","unstructured":"Zeyu Zhang, Akide Liu, Ian Reid, Richard Hartley, Bohan Zhuang, and Hao Tang. 2024b. Motion mamba: Efficient and long sequence motion generation. In ECCV."},{"key":"e_1_3_3_2_94_1","volume-title":"ECCV","author":"Zhao Amy","year":"2024","unstructured":"Amy Zhao, Chengcheng Tang, Lezi Wang, Yijing Li, Mihika Dave, Lingling Tao, Christopher\u00a0D Twigg, and Robert\u00a0Y Wang. 2024. EgoBody3M: Egocentric Body Tracking on a VR Headset using a Diverse Dataset. In ECCV."},{"key":"e_1_3_3_2_95_1","unstructured":"Wentao Zhu Xiaoxuan Ma Dongwoo Ro Hai Ci Jinlu Zhang Jiaxin Shi Feng Gao Qi Tian and Yizhou Wang. 2023. Human motion generation: A survey. TPAMI (2023)."},{"key":"e_1_3_3_2_96_1","doi-asserted-by":"crossref","unstructured":"Wenlin Zhuang Congyi Wang Jinxiang Chai Yangang Wang Ming Shao and Siyu Xia. 2022. Music2dance: Dancenet for music-driven dance generation. ACM TOMM (2022).","DOI":"10.1145\/3485664"},{"key":"e_1_3_3_2_97_1","unstructured":"Bochao Zou Zizheng Guo Xiaocheng Hu and Huimin Ma. 2024. Rhythmmamba: Fast remote physiological measurement with arbitrary length videos. arXiv:https:\/\/arXiv.org\/abs\/2404.06483 (2024)."},{"key":"e_1_3_3_2_98_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33204"}],"event":{"name":"SA Conference Papers '25: SIGGRAPH Asia 2025 Conference Papers","location":"Hong Kong Hong Kong","acronym":"SA Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the SIGGRAPH Asia 2025 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3757377.3763948","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T03:27:06Z","timestamp":1765250826000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757377.3763948"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":97,"alternative-id":["10.1145\/3757377.3763948","10.1145\/3757377"],"URL":"https:\/\/doi.org\/10.1145\/3757377.3763948","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]},"assertion":[{"value":"2025-12-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}