{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:20:56Z","timestamp":1772644856571,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,24]]},"DOI":"10.1145\/3687272.3690915","type":"proceedings-article","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T00:24:28Z","timestamp":1732062268000},"page":"453-455","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Learning-based Co-Speech Gesture Generation System for Social Robots"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6020-6590","authenticated-orcid":false,"given":"Xiangqi","family":"Li","sequence":"first","affiliation":[{"name":"School of Mathematical and Computer Science, Heriot-Watt University, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4821-5871","authenticated-orcid":false,"given":"Christian","family":"Dondrup","sequence":"additional","affiliation":[{"name":"School of Mathematical and Computer Sciences, Heriot-Watt University, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. Hardware overview - PAL SDK 23.1 documentation. https:\/\/docs.pal-robotics.com\/ari\/sdk\/23.1\/hardware\/hardware_overview.html"},{"key":"e_1_3_2_2_2_1","unstructured":"[n. d.]. NAO - Documentation \u2014 Aldebaran 2.8.7.4 documentation. http:\/\/doc.aldebaran.com\/2-8\/home_nao.html"},{"key":"e_1_3_2_2_3_1","unstructured":"[n. d.]. Pepper - Documentation \u2014 Aldebaran 2.8.7.4 documentation. http:\/\/doc.aldebaran.com\/2-8\/home_pepper.html"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555435"},{"key":"e_1_3_2_2_5_1","volume-title":"IEEE\/ACM International Conference on Human-Robot Interaction. https:\/\/api.semanticscholar.org\/CorpusID:4411615","author":"Bartneck Christoph","year":"2008","unstructured":"Christoph Bartneck, Dana Kuli\u0107, and Elizabeth\u00a0A. Croft. 2008. Measuring the anthropomorphism, animacy, likeability, perceived intelligence, and perceived safety of robots. In IEEE\/ACM International Conference on Human-Robot Interaction. https:\/\/api.semanticscholar.org\/CorpusID:4411615"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475223"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2022.104154"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2912988"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095344"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308532.3329472"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00085"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2023.3276973"},{"key":"e_1_3_2_2_13_1","volume-title":"Learning Hierarchical Cross-Modal Association for Co-Speech Gesture Generation. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Liu Xian","year":"2022","unstructured":"Xian Liu, Qianyi Wu, Hang Zhou, Yinghao Xu, Rui Qian, Xinyi Lin, Xiaowei Zhou, Wayne Wu, Bo Dai, and Bolei Zhou. 2022. Learning Hierarchical Cross-Modal Association for Co-Speech Gesture Generation. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022), 10452\u201310462. https:\/\/api.semanticscholar.org\/CorpusID:247627826"},{"key":"e_1_3_2_2_14_1","volume-title":"Gesticulating with NAO: Real-time Context-Aware Co-Speech Gesture Generation for Human-Robot Interaction. Companion Publication of the 25th International Conference on Multimodal Interaction","author":"Viet\u00a0Tuyen Nguyen Tan","year":"2023","unstructured":"Tan Viet\u00a0Tuyen Nguyen, Viktor Schmuck, and Oya \u00c7eliktutan. 2023. Gesticulating with NAO: Real-time Context-Aware Co-Speech Gesture Generation for Human-Robot Interaction. Companion Publication of the 25th International Conference on Multimodal Interaction (2023). https:\/\/api.semanticscholar.org\/CorpusID:263776453"},{"key":"e_1_3_2_2_15_1","unstructured":"palrobot. [n. d.]. ARI - The social and collaborative robot. https:\/\/pal-robotics.com\/robots\/ari\/"},{"key":"e_1_3_2_2_16_1","volume-title":"Speech Drives Templates: Co-Speech Gesture Synthesis with Learned Templates. 2021 IEEE\/CVF International Conference on Computer Vision (ICCV) (2021","author":"Qian Shenhan","year":"2021","unstructured":"Shenhan Qian, Zhi Tu, Yihao Zhi, Wen Liu, and Shenghua Gao. 2021. Speech Drives Templates: Co-Speech Gesture Synthesis with Learned Templates. 2021 IEEE\/CVF International Conference on Computer Vision (ICCV) (2021), 11057\u201311066. https:\/\/api.semanticscholar.org\/CorpusID:237194673"},{"key":"e_1_3_2_2_17_1","first-page":"203","article-title":"Hand and Mind","volume":"37","author":"Studdert-Kennedy Michael","year":"1994","unstructured":"Michael Studdert-Kennedy. 1994. Hand and Mind: What Gestures Reveal About Thought.Language and Speech 37 (1994), 203 \u2013 209. https:\/\/api.semanticscholar.org\/CorpusID:13569413","journal-title":"What Gestures Reveal About Thought.Language and Speech"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00231"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0047404507240059"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2013.09.008"},{"key":"e_1_3_2_2_21_1","volume-title":"Robots Learn Social Skills: End-to-End Learning of Co-Speech Gesture Generation for Humanoid Robots. 2019 International Conference on Robotics and Automation (ICRA)","author":"Yoon Youngwoo","year":"2018","unstructured":"Youngwoo Yoon, Woo-Ri Ko, Minsu Jang, Jaeyeon Lee, Jaehong Kim, and Geehyuk Lee. 2018. Robots Learn Social Skills: End-to-End Learning of Co-Speech Gesture Generation for Humanoid Robots. 2019 International Conference on Robotics and Automation (ICRA) (2018), 4303\u20134309. https:\/\/api.semanticscholar.org\/CorpusID:53116592"},{"key":"e_1_3_2_2_22_1","volume-title":"Speech-Driven Robot Face Action Generation with Deep Generative Model for Social Robots. In International Conference on Software Reuse. https:\/\/api.semanticscholar.org\/CorpusID:256549090","author":"Yu Chuang","year":"2022","unstructured":"Chuang Yu, Heng Zhang, Zhegong Shangguan, Xiaoxuan Hei, Angelo Cangelosi, and Adriana Tapus. 2022. Speech-Driven Robot Face Action Generation with Deep Generative Model for Social Robots. In International Conference on Software Reuse. https:\/\/api.semanticscholar.org\/CorpusID:256549090"},{"key":"e_1_3_2_2_23_1","volume-title":"Taming Diffusion Models for Audio-Driven Co-Speech Gesture Generation. 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Zhu Lingting","year":"2023","unstructured":"Lingting Zhu, Xian Liu, Xuan Liu, Rui Qian, Ziwei Liu, and Lequan Yu. 2023. Taming Diffusion Models for Audio-Driven Co-Speech Gesture Generation. 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023), 10544\u201310553. https:\/\/api.semanticscholar.org\/CorpusID:257557708"}],"event":{"name":"HAI '24: International Conference on Human-Agent Interaction","location":"Swansea United Kingdom","acronym":"HAI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 12th International Conference on Human-Agent Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687272.3690915","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3687272.3690915","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:34Z","timestamp":1755866434000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687272.3690915"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"references-count":23,"alternative-id":["10.1145\/3687272.3690915","10.1145\/3687272"],"URL":"https:\/\/doi.org\/10.1145\/3687272.3690915","relation":{},"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"2024-11-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}