{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T01:39:34Z","timestamp":1743039574041,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031816871"},{"type":"electronic","value":"9783031816888"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-81688-8_3","type":"book-chapter","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T07:23:18Z","timestamp":1740468198000},"page":"30-44","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards More Expressive Human-Robot Interactions: Combining Latent Representations and\u00a0Diffusion Models for\u00a0Co-speech Gesture Generation"],"prefix":"10.1007","author":[{"given":"Filippo","family":"Favali","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viktor","family":"Schmuck","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Valeria","family":"Villani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oya","family":"Celiktutan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,26]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Cassell, J., et al.: Animated conversation: rule-based generation of facial expression, gesture & spoken intonation for multiple conversational agents. In: Proceedings of the 21st Annual Conference on Computer Graphics and Interactive Techniques, pp. 413\u2013420 (1994)","key":"3_CR1","DOI":"10.1145\/192161.192272"},{"doi-asserted-by":"crossref","unstructured":"Cassell, J.: A framework for gesture generation and interpretation. Comput. Vis. Human-Mach. Interact., 191\u2013215 (1998)","key":"3_CR2","DOI":"10.1017\/CBO9780511569937.013"},{"doi-asserted-by":"crossref","unstructured":"Lee, G., Deng, Z., Ma, S., Shiratori, T., Srinivasa, S.S., Sheikh, Y.: Talking with hands 16.2 m: a large-scale dataset of synchronized body-finger motion and audio for conversational motion analysis and synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 763\u2013772 (2019)","key":"3_CR3","DOI":"10.1109\/ICCV.2019.00085"},{"doi-asserted-by":"crossref","unstructured":"Liu, H., et al.: Beat: a large-scale semantic and emotional multi-modal dataset for conversational gestures synthesis. In: European Conference on Computer Vision, pp. 612\u2013630. Springer (2022)","key":"3_CR4","DOI":"10.1007\/978-3-031-20071-7_36"},{"doi-asserted-by":"crossref","unstructured":"Takeuchi, K., Hasegawa, D., Shirakawa, S., Kaneko, N., Sakuta, H., Sumi, K.: Speech-to-gesture generation: a challenge in deep learning approach with bi-directional LSTM. In: Proceedings of the 5th International Conference on Human Agent Interaction, pp. 365\u2013369 (2017)","key":"3_CR5","DOI":"10.1145\/3125739.3132594"},{"doi-asserted-by":"crossref","unstructured":"Hasegawa, D., Kaneko, N., Shirakawa, S., Sakuta, H., Sumi, K.: Evaluation of speech-to-gesture generation using bi-directional LSTM network. In: Proceedings of the 18th International Conference on Intelligent Virtual Agents, pp. 79\u201386 (2018)","key":"3_CR6","DOI":"10.1145\/3267851.3267878"},{"doi-asserted-by":"crossref","unstructured":"Korzun, V., Beloborodova, A., Ilin, A.: The FineMotion entry to the GENEA challenge 2023: DeepPhase for conversational gestures generation. In: Proceedings of the 25th International Conference on Multimodal Interaction, pp. 786\u2013791 (2023)","key":"3_CR7","DOI":"10.1145\/3577190.3616119"},{"doi-asserted-by":"crossref","unstructured":"Tang, H., Wang, W., Xu, D., Yan, Y., Sebe, N.: GestureGAN for hand gesture-to-gesture translation in the wild. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 774\u2013782 (2018)","key":"3_CR8","DOI":"10.1145\/3240508.3240704"},{"doi-asserted-by":"crossref","unstructured":"Tuyen, N.T.V., Elibol, A., Chong, N.Y.: A GAN-based approach to communicative gesture generation for social robots. In: 2021 IEEE International Conference on Advanced Robotics and Its Social Impacts (ARSO), pp. 58\u201364. IEEE (2021)","key":"3_CR9","DOI":"10.1109\/ARSO51874.2021.9542828"},{"doi-asserted-by":"crossref","unstructured":"Nyatsanga, S., Kucherenko, T., Ahuja, C., Henter, G.E., Neff, M.: A comprehensive review of data-driven co-speech gesture generation. Comput. Graph. Forum 42, 569\u2013596. Wiley Online Library (2023)","key":"3_CR10","DOI":"10.1111\/cgf.14776"},{"unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.H.: Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)","key":"3_CR11"},{"doi-asserted-by":"crossref","unstructured":"Yang, S., et al.: DiffuseStyleGesture: stylized audio-driven co-speech gesture generation with diffusion models. arXiv preprint arXiv:2305.04919 (2023)","key":"3_CR12","DOI":"10.24963\/ijcai.2023\/650"},{"doi-asserted-by":"crossref","unstructured":"Yang, S., et al.: The DiffuseStyleGesture+ entry to the GENEA challenge 2023. In: Proceedings of the 25th International Conference on Multimodal Interaction, pp. 779\u2013785 (2023)","key":"3_CR13","DOI":"10.1145\/3577190.3616114"},{"key":"3_CR14","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. In: Advances in Neural Information Processing Systems, vol. 30 (2017)","key":"3_CR15"},{"doi-asserted-by":"crossref","unstructured":"Chang, C.-J., Zhang, S., Kapadia, M.: The IVI lab entry to the GENEA challenge 2022\u2013a tacotron2 based method for co-speech gesture generation with locality-constraint attention mechanism. In: Proceedings of the 2022 International Conference on Multimodal Interaction, pp. 784\u2013789 (2022)","key":"3_CR16","DOI":"10.1145\/3536221.3558060"},{"unstructured":"Mikolov, T., Grave, E., Bojanowski, P., Puhrsch, C., Joulin, A.: Advances in pre-training distributed word representations. arXiv preprint arXiv:1712.09405 (2017)","key":"3_CR17"},{"doi-asserted-by":"crossref","unstructured":"Kucherenko, T., et al.: The GENEA challenge 2023: a large-scale evaluation of gesture generation models in monadic and dyadic settings. In: Proceedings of the 25th International Conference on Multimodal Interaction, pp. 792\u2013801 (2023)","key":"3_CR18","DOI":"10.1145\/3577190.3616120"}],"container-title":["Springer Proceedings in Advanced Robotics","Human-Friendly Robotics 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-81688-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T07:23:30Z","timestamp":1740468210000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-81688-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031816871","9783031816888"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-81688-8_3","relation":{},"ISSN":["2511-1256","2511-1264"],"issn-type":[{"type":"print","value":"2511-1256"},{"type":"electronic","value":"2511-1264"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HFR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Human-Friendly Robotics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hfr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.google.com\/view\/hfr2024\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}