{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T03:16:41Z","timestamp":1774667801104,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,13]],"date-time":"2024-07-13T00:00:00Z","timestamp":1720828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,13]]},"DOI":"10.1145\/3641519.3657492","type":"proceedings-article","created":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:39:28Z","timestamp":1720780768000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["SuperPADL: Scaling Language-Directed Physics-Based Control with Progressive Supervised Distillation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2080-7074","authenticated-orcid":false,"given":"Jordan","family":"Juravsky","sequence":"first","affiliation":[{"name":"NVIDIA, Canada and Stanford University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7468-6162","authenticated-orcid":false,"given":"Yunrong","family":"Guo","sequence":"additional","affiliation":[{"name":"NVIDIA, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1040-3260","authenticated-orcid":false,"given":"Sanja","family":"Fidler","sequence":"additional","affiliation":[{"name":"NVIDIA, Canada and University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3677-5655","authenticated-orcid":false,"given":"Xue Bin","family":"Peng","sequence":"additional","affiliation":[{"name":"NVIDIA, Canada and Simon Fraser University, Canada"}]}],"member":"320","published-online":{"date-parts":[[2024,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Task-based Locomotion. ACM Transactions on Graphics (Proc. SIGGRAPH 2016)","author":"Agrawal Shailen","year":"2016","unstructured":"Shailen Agrawal and Michiel van\u00a0de Panne. 2016. Task-based Locomotion. ACM Transactions on Graphics (Proc. SIGGRAPH 2016) 35, 4 (2016)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00084"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00053"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356536"},{"key":"e_1_3_2_2_5_1","volume-title":"Language Models are Few-Shot Learners. CoRR abs\/2005.14165","author":"Brown B.","year":"2020","unstructured":"Tom\u00a0B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel\u00a0M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. CoRR abs\/2005.14165 (2020). arXiv:2005.14165https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3272127.3275048"},{"key":"e_1_3_2_2_7_1","article-title":"Robust Task-based Control Policies for Physics-based Characters","volume":"28","author":"Coros Stelian","year":"2009","unstructured":"Stelian Coros, Philippe Beaudoin, and Michiel van\u00a0de Panne. 2009. Robust Task-based Control Policies for Physics-based Characters. ACM Trans. Graph. (Proc. SIGGRAPH Asia) 28, 5 (2009), Article 170.","journal-title":"ACM Trans. Graph. (Proc. SIGGRAPH Asia)"},{"key":"e_1_3_2_2_8_1","volume-title":"Simulation of Human Motion Data using Short\u2010Horizon Model\u2010Predictive Control. Computer Graphics Forum 27","author":"da Silva Marco","year":"2008","unstructured":"Marco da Silva, Yeuhi Abe, and Jovan Popovi\u0107. 2008. Simulation of Human Motion Data using Short\u2010Horizon Model\u2010Predictive Control. Computer Graphics Forum 27 (2008)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","unstructured":"Rishabh Dabral Muhammad\u00a0Hamza Mughal Vladislav Golyanik and Christian Theobalt. 2023. MoFusion: A Framework for Denoising-Diffusion-based Motion Synthesis. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1781157"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1810.04805"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"e_1_3_2_2_13_1","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-Free Diffusion Guidance. arxiv:2207.12598\u00a0[cs.LG]"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/218380.218414"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073663"},{"key":"e_1_3_2_2_16_1","volume-title":"MotionGPT: Human Motion as a Foreign Language. arXiv preprint arXiv:2306.14795","author":"Jiang Biao","year":"2023","unstructured":"Biao Jiang, Xin Chen, Wen Liu, Jingyi Yu, Gang Yu, and Tao Chen. 2023. MotionGPT: Human Motion as a Foreign Language. arXiv preprint arXiv:2306.14795 (2023)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555391"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459826"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459774"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1781155"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1882261.1866160"},{"key":"e_1_3_2_2_22_1","volume-title":"Proceedings of the Visually Grounded Interaction and Language Workshop at NeurIPS","author":"Lin S.","year":"2018","unstructured":"Angela\u00a0S. Lin, Lemeng Wu, Rodolfo Corona, Kevin Tai, Qixing Huang, and Raymond\u00a0J. Mooney. 2018. Generating Animated Videos of Human Activities from Natural Language Descriptions. In Proceedings of the Visually Grounded Interaction and Language Workshop at NeurIPS 2018. http:\/\/www.cs.utexas.edu\/users\/ai-labpub-view.php?PubID=127730"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392422"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201315"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. https:\/\/doi.org\/10.48550\/ARXIV.1907.11692","DOI":"10.48550\/ARXIV.1907.11692"},{"key":"e_1_3_2_2_26_1","volume-title":"Perpetual Humanoid Control for Real-time Simulated Avatars. In International Conference on Computer Vision (ICCV).","author":"Luo Zhengyi","year":"2023","unstructured":"Zhengyi Luo, Jinkun Cao, Alexander\u00a0W. Winkler, Kris Kitani, and Weipeng Xu. 2023. Perpetual Humanoid Control for Real-time Simulated Avatars. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_2_27_1","volume-title":"AMASS: Archive of Motion Capture as Surface Shapes. arxiv:1904.03278\u00a0[cs.CV]","author":"Mahmood Naureen","year":"2019","unstructured":"Naureen Mahmood, Nima Ghorbani, Nikolaus\u00a0F. Troje, Gerard Pons-Moll, and Michael\u00a0J. Black. 2019. AMASS: Archive of Motion Capture as Surface Shapes. arxiv:1904.03278\u00a0[cs.CV]"},{"key":"e_1_3_2_2_28_1","volume-title":"Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning. CoRR abs\/2108.10470","author":"Makoviychuk Viktor","year":"2021","unstructured":"Viktor Makoviychuk, Lukasz Wawrzyniak, Yunrong Guo, Michelle Lu, Kier Storey, Miles Macklin, David Hoeller, Nikita Rudin, Arthur Allshire, Ankur Handa, and Gavriel State. 2021. Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning. CoRR abs\/2108.10470 (2021). arXiv:2108.10470https:\/\/arxiv.org\/abs\/2108.10470"},{"key":"e_1_3_2_2_29_1","volume-title":"Neural Probabilistic Motor Primitives for Humanoid Control. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=BJl6TjRcY7","author":"Merel Josh","year":"2019","unstructured":"Josh Merel, Leonard Hasenclever, Alexandre Galashov, Arun Ahuja, Vu Pham, Greg Wayne, Yee\u00a0Whye Teh, and Nicolas Heess. 2019. Neural Probabilistic Motor Primitives for Humanoid Control. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=BJl6TjRcY7"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392474"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073602"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530110"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459670"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_2_2_36_1","volume-title":"Learning a bidirectional mapping between human whole-body motion and natural language using deep recurrent neural networks. CoRR abs\/1705.06400","author":"Plappert Matthias","year":"2017","unstructured":"Matthias Plappert, Christian Mandery, and Tamim Asfour. 2017. Learning a bidirectional mapping between human whole-body motion and natural language using deep recurrent neural networks. CoRR abs\/1705.06400 (2017). arXiv:1705.06400http:\/\/arxiv.org\/abs\/1705.06400"},{"key":"e_1_3_2_2_37_1","volume-title":"The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=FjNys5c7VyY","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T. Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3D using 2D Diffusion. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=FjNys5c7VyY"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00078"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J. Liu. 2019. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. https:\/\/doi.org\/10.48550\/ARXIV.1910.10683","DOI":"10.48550\/ARXIV.1910.10683"},{"key":"e_1_3_2_2_40_1","volume-title":"InsActor: Instruction-driven Physics-based Characters. NeurIPS","author":"Ren Jiawei","year":"2023","unstructured":"Jiawei Ren, Mingyuan Zhang, Cunjun Yu, Xiao Ma, Liang Pan, and Ziwei Liu. 2023. InsActor: Instruction-driven Physics-based Characters. NeurIPS (2023)."},{"key":"e_1_3_2_2_41_1","unstructured":"Stephane Ross Geoffrey\u00a0J. Gordon and J.\u00a0Andrew Bagnell. 2011. A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning. arxiv:1011.0686\u00a0[cs.LG]"},{"key":"e_1_3_2_2_42_1","volume-title":"Raphael Gontijo-Lopes, Burcu\u00a0Karagol Ayan, Tim Salimans, Jonathan Ho, David\u00a0J. Fleet, and Mohammad Norouzi.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar\u00a0Seyed Ghasemipour, Raphael Gontijo-Lopes, Burcu\u00a0Karagol Ayan, Tim Salimans, Jonathan Ho, David\u00a0J. Fleet, and Mohammad Norouzi. 2022. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. In Advances in Neural Information Processing Systems, Alice\u00a0H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=08Yk-n5l2Al"},{"key":"e_1_3_2_2_43_1","volume-title":"Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347 (2017). arxiv:1707.06347http:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356505"},{"key":"e_1_3_2_2_45_1","volume-title":"Prompt","author":"Sun Jingkai","unstructured":"Jingkai Sun, Qiang Zhang, Yiqun Duan, Xiaoyang Jiang, Chong Cheng, and Renjing Xu. 2023. Prompt, Plan, Perform: LLM-based Humanoid Control via Quantized Imitation Learning. arxiv:2309.11359\u00a0[cs.RO]"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601121"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","unstructured":"Guy Tevet Brian Gordon Amir Hertz Amit\u00a0H. Bermano and Daniel Cohen-Or. 2022. MotionCLIP: Exposing Human Motion Generation to CLIP Space. https:\/\/doi.org\/10.48550\/ARXIV.2203.08063","DOI":"10.48550\/ARXIV.2203.08063"},{"key":"e_1_3_2_2_48_1","volume-title":"Human Motion Diffusion Model. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJ1kSyO2jwu","author":"Tevet Guy","year":"2023","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yoni Shafir, Daniel Cohen-or, and Amit\u00a0Haim Bermano. 2023. Human Motion Diffusion Model. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJ1kSyO2jwu"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/1275808.1276386"},{"key":"e_1_3_2_2_50_1","unstructured":"Nolan Wagener Andrey Kolobov Felipe\u00a0Vieira Frujeri Ricky Loynd Ching-An Cheng and Matthew Hausknecht. 2023. MoCapAct: A Multi-Task Dataset for Simulated Humanoid Control. arxiv:2208.07363\u00a0[cs.RO]"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1661412.1618514"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2185520.2185521"},{"key":"e_1_3_2_2_53_1","unstructured":"Tingwu Wang Yunrong Guo Maria Shugrina and Sanja Fidler. 2020. UniCon: Universal Neural Controller For Physics-based Character Motion. arxiv:2011.15119\u00a0[cs.GR]"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555411"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392381"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459761"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530067"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14115"},{"key":"e_1_3_2_2_59_1","unstructured":"Zhaoming Xie Sebastian Starke Hung\u00a0Yu Ling and Michiel van\u00a0de Panne. 2022. Learning Soccer Juggling Skills with Layer-wise Mixture-of-Experts. (2022)."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555434"},{"key":"e_1_3_2_2_61_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Zhang Jianrong","year":"2023","unstructured":"Jianrong Zhang, Yangsong Zhang, Xiaodong Cun, Shaoli Huang, Yong Zhang, Hongwei Zhao, Hongtao Lu, and Xi Shen. 2023b. T2M-GPT: Generating Human Motion from Textual Descriptions with Discrete Representations. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_62_1","volume-title":"MotionGPT: Finetuned LLMs are General-Purpose Motion Generators. arXiv preprint arXiv:2306.10900","author":"Zhang Yaqi","year":"2023","unstructured":"Yaqi Zhang, Di Huang, Bin Liu, Shixiang Tang, Yan Lu, Lu Chen, Lei Bai, Qi Chu, Nenghai Yu, and Wanli Ouyang. 2023a. MotionGPT: Finetuned LLMs are General-Purpose Motion Generators. arXiv preprint arXiv:2306.10900 (2023)."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417868"}],"event":{"name":"SIGGRAPH '24: Special Interest Group on Computer Graphics and Interactive Techniques Conference","location":"Denver CO USA","acronym":"SIGGRAPH '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657492","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3641519.3657492","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:50Z","timestamp":1750291550000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657492"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,13]]},"references-count":63,"alternative-id":["10.1145\/3641519.3657492","10.1145\/3641519"],"URL":"https:\/\/doi.org\/10.1145\/3641519.3657492","relation":{},"subject":[],"published":{"date-parts":[[2024,7,13]]},"assertion":[{"value":"2024-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}