{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:40:08Z","timestamp":1755873608449,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,19]],"date-time":"2023-09-19T00:00:00Z","timestamp":1695081600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,19]]},"DOI":"10.1145\/3570945.3607336","type":"proceedings-article","created":{"date-parts":[[2023,12,22]],"date-time":"2023-12-22T06:07:02Z","timestamp":1703225222000},"page":"1-3","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["How Far ahead Can Model Predict Gesture Pose from Speech and Spoken Text?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-3849-1656","authenticated-orcid":false,"given":"Ryo","family":"Ishii","sequence":"first","affiliation":[{"name":"NTT Corporation, Minato-ku, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8389-7463","authenticated-orcid":false,"given":"Akira","family":"Morikawa","sequence":"additional","affiliation":[{"name":"NTT Corporation, Minato-ku, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4351-3530","authenticated-orcid":false,"given":"Shinichiro","family":"Eitoku","sequence":"additional","affiliation":[{"name":"NTT Corporation, Minato-ku, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2133-016X","authenticated-orcid":false,"given":"Atsushi","family":"Fukayama","sequence":"additional","affiliation":[{"name":"NTT Corporation, Minato-ku, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8267-871X","authenticated-orcid":false,"given":"Takao","family":"Nakamura","sequence":"additional","affiliation":[{"name":"NTT Corporation, Minato-ku, Tokyo, Japan"}]}],"member":"320","published-online":{"date-parts":[[2023,12,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.170"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01991"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58523-5_15"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353725"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.13946"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.471"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555435"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/VR50410.2021.00037"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2020.02.005"},{"key":"e_1_3_2_1_10_1","volume-title":"workshop at 2022 ACM\/IEEE International Conference on Human-Robot Interaction","author":"Deichler A.","year":"2022","unstructured":"A. Deichler, S. Wang, S. Alexanderson, and J. Beskow. Towards context-aware human-like pointing gestures with rl motion imitation. In Context-Awareness in Human-Robot Interaction: Approaches and Challenges, workshop at 2022 ACM\/IEEE International Conference on Human-Robot Interaction, 2022."},{"key":"e_1_3_2_1_11_1","first-page":"6","article-title":"Zero-shot style transfer for gesture animation driven by text and speech using adversarial disentanglement of multimodal style encoding","author":"Fares Mireille","year":"2023","unstructured":"Mireille Fares, Catherine Pelachaud, and Nicolas Obin. Zero-shot style transfer for gesture animation driven by text and speech using adversarial disentanglement of multimodal style encoding. Frontiers in Artificial Intelligence, 6, 2023.","journal-title":"Frontiers in Artificial Intelligence"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1002\/cav.2016"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558068"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00361"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530750"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472306.3478335"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267851.3267878"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2856281"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383652.3423908"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.2197\/ipsjjip.29.30"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267851.3267869"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267851.3267866"},{"key":"e_1_3_2_1_23_1","volume-title":"Naoshi Kaneko, and Hedvig Kjellstr\u00f6m. Analyzing input and output representations for speech-driven gesture generation. In IVA, page 97--104","author":"Kucherenko Taras","year":"2019","unstructured":"Taras Kucherenko, Dai Hasegawa, Gustav Eje Henter, Naoshi Kaneko, and Hedvig Kjellstr\u00f6m. Analyzing input and output representations for speech-driven gesture generation. In IVA, page 97--104, 2019."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2021.1883883"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3382507.3418815"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3535850.3535937"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461615.3485408"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01110"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01022"},{"key":"e_1_3_2_1_30_1","first-page":"1884","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Heusel","year":"2017","unstructured":"Heusel M., Ramsauer H., Unterthiner T., Nessler B., and Hochreiter S. Gans trained by a two time-scale update rule converge to a local nash equilibrium. In Advances in Neural Information Processing Systems (NeurIPS), volume 30, pages 1884--1895. Association for Computational Linguistics, 2017."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353736"},{"key":"e_1_3_2_1_32_1","first-page":"01","article-title":"A comprehensive review of data-driven co-speech gesture generation","volume":"42","author":"Nyatsanga Simbarashe","year":"2023","unstructured":"Simbarashe Nyatsanga, Taras Kucherenko, Chaitanya Ahuja, Gustav Henter, and Michael Neff. A comprehensive review of data-driven co-speech gesture generation. In EUROGRAPHICS, volume 42, 01 2023.","journal-title":"EUROGRAPHICS"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01089"},{"key":"e_1_3_2_1_34_1","first-page":"573","volume-title":"Passing a non-verbal turing test: Evaluating gesture animations generated from speech. 2021 IEEE Virtual Reality and 3D User Interfaces (VR)","author":"Rebol Manuel","year":"2021","unstructured":"Manuel Rebol, Christian G\u00fctl, and Krzysztof Pietroszek. Passing a non-verbal turing test: Evaluating gesture animations generated from speech. 2021 IEEE Virtual Reality and 3D User Interfaces (VR), pages 573--581, 2021."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2019.04.005"},{"key":"e_1_3_2_1_36_1","volume-title":"Cmcf: An architecture for realtime gesture generation by clustering gestures by motion and communicative function. page 1136--1144","author":"Saund Carolyn","year":"2021","unstructured":"Carolyn Saund, Andrei B\u00eerl\u0103deanu, and Stacy Marsella. Cmcf: An architecture for realtime gesture generation by clustering gestures by motion and communicative function. page 1136--1144, 2021."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461615.3485407"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.12.002"},{"key":"e_1_3_2_1_39_1","volume-title":"Speech gesture generation from the trimodal context of text, audio, and speaker identity. 39(6)","author":"Yoon Youngwoo","year":"2020","unstructured":"Youngwoo Yoon, Bok Cha, Joo-Haeng Lee, Minsu Jang, Jaeyeon Lee, Jaehong Kim, and Geehyuk Lee. Speech gesture generation from the trimodal context of text, audio, and speaker identity. 39(6), 2020."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793720"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558058"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558063"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00341"},{"key":"e_1_3_2_1_44_1","volume-title":"International Joint Conference on Artificial Intelligence","author":"Zhuang Wenlin","year":"2022","unstructured":"Wenlin Zhuang, Jinwei Qi, Peng Zhang, Bang Zhang, and Ping Tan. Text\/speech-driven full-body animation. In International Joint Conference on Artificial Intelligence, 2022."}],"event":{"name":"IVA '23: ACM International Conference on Intelligent Virtual Agents","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence"],"location":"W\u00fcrzburg Germany","acronym":"IVA '23"},"container-title":["Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570945.3607336","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3570945.3607336","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:26:46Z","timestamp":1755872806000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570945.3607336"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,19]]},"references-count":44,"alternative-id":["10.1145\/3570945.3607336","10.1145\/3570945"],"URL":"https:\/\/doi.org\/10.1145\/3570945.3607336","relation":{},"subject":[],"published":{"date-parts":[[2023,9,19]]},"assertion":[{"value":"2023-12-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}