{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T19:10:03Z","timestamp":1755976203358,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62320106007,62032022,62236006"],"award-info":[{"award-number":["62320106007,62032022,62236006"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Scientific Research Program of Beijing Municipal Education Commission","award":["KZ201911417048"],"award-info":[{"award-number":["KZ201911417048"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3688865.3689476","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T14:10:52Z","timestamp":1729433452000},"page":"13-22","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AdaptControl: Adaptive Human Motion Control and Generation via User Prompt and Spatial Trajectory Guidance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5877-2126","authenticated-orcid":false,"given":"Qingyuan","family":"Liu","sequence":"first","affiliation":[{"name":"University of the Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1732-8627","authenticated-orcid":false,"given":"Zehai","family":"Niu","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0176-3088","authenticated-orcid":false,"given":"Ke","family":"Lu","sequence":"additional","affiliation":[{"name":"University of the Chinese Academy of Sciences &amp; Peng Cheng Laboratory, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3714-1650","authenticated-orcid":false,"given":"Kun","family":"Dong","sequence":"additional","affiliation":[{"name":"University of the Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9460-802X","authenticated-orcid":false,"given":"Jian","family":"Xue","sequence":"additional","affiliation":[{"name":"University of the Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9720-3220","authenticated-orcid":false,"given":"Xiaoyu","family":"Qin","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5916-8965","authenticated-orcid":false,"given":"Jinbao","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460608"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00084"},{"key":"e_1_3_2_1_3_1","volume-title":"Ilvr: Conditioning method for denoising diffusion probabilistic models. arXiv preprint arXiv:2108.02938","author":"Choi Jooyoung","year":"2021","unstructured":"Jooyoung Choi, Sungwon Kim, Yonghyun Jeong, Youngjune Gwon, and Sungroh Yoon. 2021. Ilvr: Conditioning method for denoising diffusion probabilistic models. arXiv preprint arXiv:2108.02938 (2021)."},{"key":"e_1_3_2_1_4_1","first-page":"25683","article-title":"Improving diffusion models for inverse problems using manifold constraints","volume":"35","author":"Chung Hyungjin","year":"2022","unstructured":"Hyungjin Chung, Byeongsu Sim, Dohoon Ryu, and Jong Chul Ye. 2022. Improving diffusion models for inverse problems using manifold constraints. Advances in Neural Information Processing Systems, Vol. 35 (2022), 25683--25696.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems, Vol. 34 (2021), 8780--8794."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00054"},{"key":"e_1_3_2_1_7_1","volume-title":"Sen Wang, and Li Cheng.","author":"Guo Chuan","year":"2023","unstructured":"Chuan Guo, Yuxuan Mu, Muhammad Gohar Javed, Sen Wang, and Li Cheng. 2023. MoMask: Generative Masked Modeling of 3D Human Motions. (2023). arxiv: 2312.00063 [cs.CV]"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413635"},{"key":"e_1_3_2_1_10_1","unstructured":"Jonathan Ho William Chan Chitwan Saharia Jay Whang Ruiqi Gao Alexey Gritsenko Diederik P Kingma Ben Poole Mohammad Norouzi David J Fleet et al. 2022. Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840--6851."},{"key":"e_1_3_2_1_12_1","volume-title":"Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"key":"e_1_3_2_1_13_1","unstructured":"Jessica Hodgins. 2015. Carnegie Mellon University graphics lab motion capture database."},{"key":"e_1_3_2_1_14_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Jiang Biao","year":"2024","unstructured":"Biao Jiang, Xin Chen, Wen Liu, Jingyi Yu, Gang Yu, and Tao Chen. 2024. Motiongpt: Human motion as a foreign language. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Gmd: Controllable human motion synthesis via guided diffusion models. arXiv preprint arXiv:2305.12577","author":"Karunratanakul Korrawe","year":"2023","unstructured":"Korrawe Karunratanakul, Konpat Preechakul, Supasorn Suwajanakorn, and Siyu Tang. 2023. Gmd: Controllable human motion synthesis via guided diffusion models. arXiv preprint arXiv:2305.12577 (2023)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01360"},{"key":"e_1_3_2_1_18_1","volume-title":"Diffwave: A versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761","author":"Kong Zhifeng","year":"2020","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2020. Diffwave: A versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761 (2020)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02027"},{"key":"e_1_3_2_1_20_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00554"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAR.2015.7251476"},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR, 8162--8171","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. In International conference on machine learning. PMLR, 8162--8171."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_2_1_25_1","volume-title":"MMM: Generative Masked Motion Model. arXiv preprint arXiv:2312.03596","author":"Pinyoanuntapong Ekkasit","year":"2023","unstructured":"Ekkasit Pinyoanuntapong, Pu Wang, Minwoo Lee, and Chen Chen. 2023. MMM: Generative Masked Motion Model. arXiv preprint arXiv:2312.03596 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"The KIT motion-language dataset. Big data","author":"Plappert Matthias","year":"2016","unstructured":"Matthias Plappert, Christian Mandery, and Tamim Asfour. 2016. The KIT motion-language dataset. Big data, Vol. 4, 4 (2016), 236--252."},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_29_1","volume-title":"Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418","author":"Shafir Yonatan","year":"2023","unstructured":"Yonatan Shafir, Guy Tevet, Roy Kapon, and Amit H Bermano. 2023. Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00855"},{"key":"e_1_3_2_1_31_1","volume-title":"Human motion diffusion model. arXiv preprint arXiv:2209.14916","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, and Amit H Bermano. 2022. Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)."},{"key":"e_1_3_2_1_32_1","unstructured":"Aaron Van Den Oord Oriol Vinyals et al. 2017. Neural discrete representation learning. Advances in neural information processing systems Vol. 30 (2017)."},{"key":"e_1_3_2_1_33_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_34_1","volume-title":"T2M-HiFiGPT: Generating High Quality Human Motion from Textual Descriptions with Residual Discrete Representations. (Dec","author":"Wang Congyi","year":"2023","unstructured":"Congyi Wang. 2023. T2M-HiFiGPT: Generating High Quality Human Motion from Textual Descriptions with Residual Discrete Representations. (Dec 2023)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01281"},{"key":"e_1_3_2_1_36_1","volume-title":"Estimating Egocentric 3D Human Pose in Global Space. international conference on computer vision","author":"Wang Jian","year":"2021","unstructured":"Jian Wang, Lingjie Liu, Weipeng Xu, Kripasindhu Sarkar, and Christian Theobalt. 2021. Estimating Egocentric 3D Human Pose in Global Space. international conference on computer vision (2021)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01252"},{"key":"e_1_3_2_1_38_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Xie Yiming","year":"2024","unstructured":"Yiming Xie, Varun Jampani, Lei Zhong, Deqing Sun, and Huaizu Jiang. 2024. OmniControl: Control Any Joint at Any Time for Human Motion Generation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01282"},{"key":"e_1_3_2_1_40_1","first-page":"1","article-title":"TransPose: real-time 3D human translation and pose estimation with six inertial sensors","volume":"40","author":"Yi Xinyu","year":"2021","unstructured":"Xinyu Yi, Yuxiao Zhou, and Feng Xu. 2021. TransPose: real-time 3D human translation and pose estimation with six inertial sensors. ACM Transactions on Graphics (TOG), Vol. 40, 4 (2021), 1--13.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_44_1","volume-title":"Motiondiffuse: Text-driven human motion generation with diffusion model","author":"Zhang Mingyuan","year":"2024","unstructured":"Mingyuan Zhang, Zhongang Cai, Liang Pan, Fangzhou Hong, Xinying Guo, Lei Yang, and Ziwei Liu. 2024. Motiondiffuse: Text-driven human motion generation with diffusion model. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors. arXiv preprint arXiv:2312.02196","author":"Zhang Yu","year":"2023","unstructured":"Yu Zhang, Songpengcheng Xia, Lei Chu, Jiarui Yang, Qi Wu, and Ling Pei. 2023. Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors. arXiv preprint arXiv:2312.02196 (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00053"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-96530-3"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 5th International Workshop on Human-centric Multimedia Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3688865.3689476","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3688865.3689476","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:38:42Z","timestamp":1755974322000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3688865.3689476"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":47,"alternative-id":["10.1145\/3688865.3689476","10.1145\/3688865"],"URL":"https:\/\/doi.org\/10.1145\/3688865.3689476","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}