{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T11:16:52Z","timestamp":1768994212288,"version":"3.49.0"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"National Key R&amp;D Program of China","award":["2022YFF0902202"],"award-info":[{"award-number":["2022YFF0902202"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1109\/lra.2024.3359544","type":"journal-article","created":{"date-parts":[[2024,1,29]],"date-time":"2024-01-29T18:49:02Z","timestamp":1706554142000},"page":"2718-2725","source":"Crossref","is-referenced-by-count":10,"title":["GesGPT: Speech Gesture Synthesis With Text Parsing From ChatGPT"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9547-1304","authenticated-orcid":false,"given":"Nan","family":"Gao","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6612-9731","authenticated-orcid":false,"given":"Zeyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2150-2088","authenticated-orcid":false,"given":"Zhi","family":"Zeng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6013-6351","authenticated-orcid":false,"given":"Shuwu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2352-0896","authenticated-orcid":false,"given":"Dongdong","family":"Weng","sequence":"additional","affiliation":[{"name":"Beijing Engineering Research Center of Mixed Reality and Advanced Display, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4250-2258","authenticated-orcid":false,"given":"Yihua","family":"Bao","sequence":"additional","affiliation":[{"name":"Beijing Engineering Research Center of Mixed Reality and Advanced Display, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417836"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14734"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01089"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3472306.3478335"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3267851.3267878"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-77817-0_25"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555435"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01016"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558058"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14776"},{"key":"ref11","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Brown","year":"2020"},{"key":"ref12","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3310935"},{"key":"ref15","article-title":"ChatGPT for robotics: Design principles and model abilities","volume":"2","author":"Vemprala","year":"2023","journal-title":"Microsoft Auton. Syst. Robot. Res."},{"key":"ref16","article-title":"Human-Assisted continual robot learning with foundation models","author":"Parakh","year":"2023"},{"key":"ref17","first-page":"127","article-title":"ROGUE: Robot gesture engine","volume-title":"Proc. AAAI Spring Symp. Ser.","author":"Holladay","year":"2016"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474789"},{"key":"ref19","article-title":"Multimodal analysis of the predictability of hand-gesture properties","author":"Kucherenko","year":"2021"},{"key":"ref20","volume-title":"Gesture Generation by Imitation: From Human Behavior to Computer Character Animation","author":"Kipp","year":"2005"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/650"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00230"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1162\/jocn.2007.19.4.605"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/192161.192272"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2009.5326136"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558063"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530750"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747380"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01022"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981734"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.7208\/chicago\/9780226514642.001.0001"},{"key":"ref32","article-title":"Grounded decoding: Guiding text generation with grounded models for robot control","author":"Huang","year":"2023"},{"key":"ref33","article-title":"VoxPoser: Composable 3D value maps for robotic manipulation with language models","author":"Huang","year":"2023"},{"key":"ref34","article-title":"MotionGPT: Human motion as a foreign language","author":"Jiang","year":"2023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-08-015867-9.50013-7"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_36"},{"key":"ref37","first-page":"11050","article-title":"Bailando: 3D dance generation by actor-critic GPT with choreographic memory","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Li","year":"2022"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2208639"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1386"},{"key":"ref40","article-title":"MediaPipe: A framework for building perception pipelines","author":"Lugaresi","year":"2019"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417838"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511620850.012"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10409903\/10415498.pdf?arnumber=10415498","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:31:33Z","timestamp":1734982293000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10415498\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3]]},"references-count":43,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3359544","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3]]}}}