{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:29:56Z","timestamp":1760956196901,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031246661"},{"type":"electronic","value":"9783031246678"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-24667-8_6","type":"book-chapter","created":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T09:03:44Z","timestamp":1675155824000},"page":"61-74","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Speech-Driven Robot Face Action Generation with\u00a0Deep Generative Model for\u00a0Social Robots"],"prefix":"10.1007","author":[{"given":"Chuang","family":"Yu","sequence":"first","affiliation":[]},{"given":"Heng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhegong","family":"Shangguan","sequence":"additional","affiliation":[]},{"given":"Xiaoxuan","family":"Hei","sequence":"additional","affiliation":[]},{"given":"Angelo","family":"Cangelosi","sequence":"additional","affiliation":[]},{"given":"Adriana","family":"Tapus","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,1]]},"reference":[{"key":"6_CR1","doi-asserted-by":"publisher","unstructured":"Yu, C., Tapus, A.: Interactive robot learning for multimodal emotion recognition. In: Salichs, M.A., et al. (eds.) ICSR 2019. LNCS (LNAI), vol. 11876, pp. 633\u2013642. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-35888-4_59","DOI":"10.1007\/978-3-030-35888-4_59"},{"issue":"6","key":"6_CR2","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1016\/j.robot.2014.03.003","volume":"62","author":"K Noda","year":"2014","unstructured":"Noda, K., Arie, H., Suga, Y., Ogata, T.: Multimodal integration learning of robot behavior using deep neural networks. Robot. Autonom. Syst. 62(6), 721\u2013736 (2014)","journal-title":"Robot. Autonom. Syst."},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Yu, C., Tapus, A.: Multimodal emotion recognition with thermal and RGB-D cameras for human-robot interaction. In: Companion of the ACM\/IEEE International Conference on Human-Robot Interaction, vol. 2020, pp. 532\u2013534 (2020)","DOI":"10.1145\/3371382.3378342"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Yu, C., Changzeng, F., Chen, R., Tapus, A.: First attempt of gender-free speech style transfer for genderless robot. In ACM\/IEEE International Conference on Human-Robot Interaction, vol. 2022, pp. 1110\u20131113 (2022)","DOI":"10.1109\/HRI53351.2022.9889533"},{"key":"6_CR5","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1145\/3414685.3417838","volume":"39","author":"Y Yoon","year":"2020","unstructured":"Yoon, Y., et al.: Speech gesture generation from the trimodal context of text, audio, and speaker identity. ACM Trans. Graph. 39, 6 (2020)","journal-title":"ACM Trans. Graph."},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Vougioukas, K., Petridis, S., Pantic, M.: End-to-end speech-driven realistic facial animation with temporal gans. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 37\u201340 (2019)","DOI":"10.1007\/s11263-019-01251-8"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Habibie, I., Holden, D., Schwarz, J., Yearsley, J., Komura, T.: A recurrent variational autoencoder for human motion synthesis. In: 28th British Machine Vision Conference (2017)","DOI":"10.5244\/C.31.119"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Yu, C., Tapus, A.: Srg 3: Speech-driven robot gesture generation with GAN. In: 2020 16th International Conference on Control, Automation, Robotics and Vision (ICARCV), pp. 759\u2013766. IEEE (2020)","DOI":"10.1109\/ICARCV50220.2020.9305330"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Zhang, H., Yu, C., Tapus, A.: Why do you think this joke told by robot is funny? The humor style matters. In: 2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN), pp. 572\u2013577. IEEE (2022)","DOI":"10.1109\/RO-MAN53752.2022.9900515"},{"key":"6_CR10","unstructured":"Yu, C.: Robot behavior generation and human behavior understanding in natural human-robot interaction. Ph.D. dissertation, Institut Polytechnique de Paris (2021)"},{"key":"6_CR11","doi-asserted-by":"publisher","unstructured":"Lee, J., Marsella, S.: Nonverbal behavior generator for embodied conversational agents. In: Gratch, J., Young, M., Aylett, R., Ballin, D., Olivier, P. (eds.) IVA 2006. LNCS (LNAI), vol. 4133, pp. 243\u2013255. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11821830_20","DOI":"10.1007\/11821830_20"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Aly, A., Tapus, A.: Multimodal adapted robot behavior synthesis within a narrative human-robot interaction. In: 2015 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 2986\u20132993. IEEE (2015)","DOI":"10.1109\/IROS.2015.7353789"},{"issue":"3\u20134","key":"6_CR13","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1007\/s10846-014-0066-1","volume":"78","author":"JW Park","year":"2015","unstructured":"Park, J.W., Lee, H.S., Chung, M.J.: Generation of realistic robot facial expressions for human robot interaction. J. Intell. Robot. Syst. 78(3\u20134), 443\u2013462 (2015)","journal-title":"J. Intell. Robot. Syst."},{"key":"6_CR14","unstructured":"Foster, D.: Generative deep learning: teaching machines to paint, write, compose, and play. O\u2019Reilly Media (2019)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Gupta, A., Johnson, J., Fei-Fei, L., Savarese, S., Alahi, A.: Social gan: Socially acceptable trajectories with generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2255\u20132264 (2018)","DOI":"10.1109\/CVPR.2018.00240"},{"key":"6_CR16","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., Courville, A.C.: Improved training of wasserstein gans. In: Advances in Neural Information Processing Systems, pp. 5767\u20135777 (2017)"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.-Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"6_CR18","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: International Conference on Machine Learning, pp. 1530\u20131538 (2015)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Henter, G.E., Alexanderson, S., Beskow, J.: Moglow: Probabilistic and controllable motion synthesis using normalising flows. arXiv preprint arXiv:1905.06598 (2019)","DOI":"10.1145\/3414685.3417836"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Blanz, V., Vetter, T.: A morphable model for the synthesis of 3d faces. In: Proceedings of the 26th Annual Conference on Computer Graphics and Interactive Techniques, pp. 187\u2013194 (1999)","DOI":"10.1145\/311535.311556"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Egger, B., et al.: 3d morphable face models-past, present, and future. ACM Trans. Graph. 39(5), 1\u201338 (2020)","DOI":"10.1145\/3395208"},{"issue":"6","key":"6_CR22","first-page":"1","volume":"39","author":"Y Zhou","year":"2020","unstructured":"Zhou, Y., Han, X., Shechtman, E., Echevarria, J., Kalogerakis, E., Li, D.: Makelttalk: Speaker-aware talking-head animation. ACM Trans. Graph. 39(6), 1\u201315 (2020)","journal-title":"ACM Trans. Graph."},{"key":"6_CR23","unstructured":"Sadoughi, N., Busso, C.: Speech-driven expressive talking lips with conditional sequential generative adversarial networks. IEEE Trans. Affect. Comput. (2019)"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Hussen Abdelaziz, A., Theobald, B.-J., Dixon, P., Knothe, R., Apostoloff, N., Kajareker, S.: Modality dropout for improved performance-driven talking faces. In: Proceedings of the 2020 International Conference on Multimodal Interaction, pp. 378\u2013386 (2020)","DOI":"10.1145\/3382507.3418840"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Ishi, C.T., Minato, T., Ishiguro, H.: Analysis and generation of laughter motions, and evaluation in an android robot. APSIPA Trans. Signal Inf. Process. 8 (2019)","DOI":"10.1017\/ATSIP.2018.32"},{"key":"6_CR26","unstructured":"Arjovsky, M., Chintala, S., Bottou, L.: Wasserstein generative adversarial networks. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 214\u2013223 (2017)"},{"issue":"6","key":"6_CR27","doi-asserted-by":"publisher","first-page":"591","DOI":"10.1109\/TMM.2010.2052239","volume":"12","author":"G Fanelli","year":"2010","unstructured":"Fanelli, G., Gall, J., Romsdorfer, H., Weise, T., Van Gool, L.: A 3-d audio-visual corpus of affective communication. IEEE Trans. Multim. 12(6), 591\u2013598 (2010)","journal-title":"IEEE Trans. Multim."},{"issue":"4","key":"6_CR28","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M Sahidullah","year":"2012","unstructured":"Sahidullah, M., Saha, G.: Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition. Speech Commun. 54(4), 543\u2013565 (2012)","journal-title":"Speech Commun."},{"key":"6_CR29","first-page":"1755","volume":"10","author":"DE King","year":"2009","unstructured":"King, D.E.: Dlib-ml: A machine learning toolkit. J. Mach. Learn. Res. 10, 1755\u20131758 (2009)","journal-title":"J. Mach. Learn. Res."},{"issue":"6","key":"6_CR30","first-page":"1","volume":"1","author":"N Dave","year":"2013","unstructured":"Dave, N.: Feature extraction methods LPC, PLP and MFCC in speech recognition. Int. J. Adv. Res. Eng. Technol. 1(6), 1\u20134 (2013)","journal-title":"Int. J. Adv. Res. Eng. Technol."},{"key":"6_CR31","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Hasegawa, D., Kaneko, N., Shirakawa, S., Sakuta, H., Sumi, K.: Evaluation of speech-to-gesture generation using bi-directional LSTM network. In: Proceedings of the 18th International Conference on Intelligent Virtual Agents, pp. 79\u201386 (2018)","DOI":"10.1145\/3267851.3267878"}],"container-title":["Lecture Notes in Computer Science","Social Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-24667-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T09:06:10Z","timestamp":1675155970000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-24667-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031246661","9783031246678"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-24667-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 February 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICSR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Social Robotics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Florence","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"socrob2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.icsr2022.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EquinOCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"143","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"111","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"78% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}