{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T03:37:20Z","timestamp":1774496240451,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T00:00:00Z","timestamp":1696809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,9]]},"DOI":"10.1145\/3610661.3616547","type":"proceedings-article","created":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T16:51:22Z","timestamp":1696870282000},"page":"228-237","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Towards the generation of synchronized and believable non-verbal facial behaviors of a talking virtual agent"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4815-5074","authenticated-orcid":false,"given":"Alice","family":"Delbosc","sequence":"first","affiliation":[{"name":"DAVI The Humanizers, France and CNRS, LIS, Aix Marseille University, France and CNRS, LISN, Paris-Saclay University, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7919-5688","authenticated-orcid":false,"given":"Magalie","family":"Ochs","sequence":"additional","affiliation":[{"name":"CNRS, LIS, Aix Marseille University, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7458-6732","authenticated-orcid":false,"given":"Nicolas","family":"Sabouret","sequence":"additional","affiliation":[{"name":"CNRS, LISN, Paris-Saclay University, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6824-4800","authenticated-orcid":false,"given":"Brian","family":"Ravenet","sequence":"additional","affiliation":[{"name":"CNRS, LISN, Paris-Saclay University, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2982-7127","authenticated-orcid":false,"given":"Stephane","family":"Ayache","sequence":"additional","affiliation":[{"name":"CNRS, LIS, Aix Marseille University, France"}]}],"member":"320","published-online":{"date-parts":[[2023,10,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1177\/1078390317719321"},{"key":"e_1_3_2_1_2_1","volume-title":"Wasserstein gan. arXiv","author":"Arjovsky Martin","year":"2017","unstructured":"Martin Arjovsky, Soumith Chintala, and L\u00e9on Bottou. 2017. Wasserstein gan. arXiv 2017. arXiv preprint arXiv:1701.07875 30, 4 (2017)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477553"},{"key":"e_1_3_2_1_4_1","volume-title":"Text2gestures: A transformer-based network for generating emotive body gestures for virtual agents. In 2021 IEEE virtual reality and 3D user interfaces (VR)","author":"Bhattacharya Uttaran","unstructured":"Uttaran Bhattacharya, Nicholas Rewkowski, Abhishek Banerjee, Pooja Guhan, Aniket Bera, and Dinesh Manocha. 2021. Text2gestures: A transformer-based network for generating emotive body gestures for virtual agents. In 2021 IEEE virtual reality and 3D user interfaces (VR). IEEE, 1\u201310."},{"key":"e_1_3_2_1_5_1","volume-title":"Studying social interactions through immersive virtual environment technology: virtues, pitfalls, and future challenges. Frontiers in psychology 6","author":"Bombari Dario","year":"2015","unstructured":"Dario Bombari, Marianne Schmid\u00a0Mast, Elena Canadas, and Manuel Bachmann. 2015. Studying social interactions through immersive virtual environment technology: virtues, pitfalls, and future challenges. Frontiers in psychology 6 (2015), 869."},{"key":"e_1_3_2_1_6_1","volume-title":"Rigid head motion in expressive speech animation: Analysis and synthesis","author":"Busso Carlos","year":"2007","unstructured":"Carlos Busso, Zhigang Deng, Michael Grimm, Ulrich Neumann, and Shrikanth Narayanan. 2007. Rigid head motion in expressive speech animation: Analysis and synthesis. IEEE transactions on audio, speech, and language processing 15, 3 (2007), 1075\u20131086."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","first-page":"413","DOI":"10.1109\/TVCG.2013.249","article-title":"Facewarehouse: A 3d facial expression database for visual computing","volume":"20","author":"Cao Chen","year":"2013","unstructured":"Chen Cao, Yanlin Weng, Shun Zhou, Yiying Tong, and Kun Zhou. 2013. Facewarehouse: A 3d facial expression database for visual computing. IEEE Transactions on Visualization and Computer Graphics 20, 3 (2013), 413\u2013425.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/332051.332075"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/192161.192272"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/2615731.2615857"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536220.3558806"},{"key":"e_1_3_2_1_12_1","volume-title":"DAE-Talker: High Fidelity Speech-Driven Talking Face Generation with Diffusion Autoencoder. arXiv preprint arXiv:2303.17550","author":"Du Chenpng","year":"2023","unstructured":"Chenpng Du, Qi Chen, Tianyu He, Xu Tan, Xie Chen, Kai Yu, Sheng Zhao, and Jiang Bian. 2023. DAE-Talker: High Fidelity Speech-Driven Talking Face Generation with Diffusion Autoencoder. arXiv preprint arXiv:2303.17550 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00054"},{"key":"e_1_3_2_1_14_1","volume-title":"Salt Lake City","author":"Ekman Paul","year":"2002","unstructured":"Paul Ekman. 2002. Facial action coding system (FACS). A Human Face, Salt Lake City (2002)."},{"key":"e_1_3_2_1_15_1","volume-title":"Facial action coding system. Environmental Psychology & Nonverbal Behavior","author":"Ekman Paul","year":"1978","unstructured":"Paul Ekman and Wallace\u00a0V Friesen. 1978. Facial action coding system. Environmental Psychology & Nonverbal Behavior (1978)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2023.1142997"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359566.3360053"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1002\/cav.2016"},{"key":"e_1_3_2_1_20_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems 27","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"David Greenwood Stephen Laycock and Iain Matthews. 2017. Predicting head pose from speech with a conditional variational autoencoder. ISCA.","DOI":"10.21437\/Interspeech.2017-894"},{"key":"e_1_3_2_1_22_1","volume-title":"Improved training of wasserstein gans. Advances in neural information processing systems 30","author":"Gulrajani Ishaan","year":"2017","unstructured":"Ishaan Gulrajani, Faruk Ahmed, Martin Arjovsky, Vincent Dumoulin, and Aaron\u00a0C Courville. 2017. Improved training of wasserstein gans. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530750"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.119"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472306.3478335"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267851.3267878"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of Linguistics and Phonetics: Item Order in Language and Speech (LP 98","author":"Honda Kiyoshi","year":"2000","unstructured":"Kiyoshi Honda. 2000. Interactions between vowel articulation and F0 control. In Proceedings of Linguistics and Phonetics: Item Order in Language and Speech (LP 98 (2000)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383652.3423911"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3462244.3479957"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073658"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511807572"},{"key":"e_1_3_2_1_32_1","volume-title":"Lecture video synthesis from audio. arXiv preprint arXiv:1907.02253","author":"Kim Byung-Hak","year":"2019","unstructured":"Byung-Hak Kim and Varun Ganapathi. 2019. Lumi\\ erenet: Lecture video synthesis from audio. arXiv preprint arXiv:1907.02253 (2019)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CA.2002.1017547"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308532.3329472"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2021.1883883"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3382507.3418815"},{"key":"e_1_3_2_1_37_1","volume-title":"Multimodal analysis of the predictability of hand-gesture properties. arXiv preprint arXiv:2108.05762","author":"Kucherenko Taras","year":"2021","unstructured":"Taras Kucherenko, Rajmund Nagy, Michael Neff, Hedvig Kjellstr\u00f6m, and Gustav\u00a0Eje Henter. 2021. Multimodal analysis of the predictability of hand-gesture properties. arXiv preprint arXiv:2108.05762 (2021)."},{"key":"e_1_3_2_1_38_1","volume-title":"Evaluating gesture-generation in a large-scale open challenge: The GENEA Challenge","author":"Kucherenko Taras","year":"2022","unstructured":"Taras Kucherenko, Pieter Wolfert, Youngwoo Yoon, Carla Viegas, Teodor Nikolov, Mihail Tsakov, and Gustav\u00a0Eje Henter. 2023. Evaluating gesture-generation in a large-scale open challenge: The GENEA Challenge 2022. arXiv preprint arXiv:2303.08737 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Sergey Levine Christian Theobalt and Vladlen Koltun. 2009. Real-time prosody-driven synthesis of body language. In ACM SIGGRAPH Asia 2009 papers. 1\u201310.","DOI":"10.1145\/1661412.1618518"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01110"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2201476"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485895.2485900"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511620850"},{"key":"e_1_3_2_1_45_1","volume-title":"Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163","author":"Metz Luke","year":"2016","unstructured":"Luke Metz, Ben Poole, David Pfau, and Jascha Sohl-Dickstein. 2016. Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163 (2016)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.0963-7214.2004.01502010.x"},{"key":"e_1_3_2_1_47_1","volume-title":"Capturing Behavior in Small Doses: A Review of Comparative Research in Evaluating Thin Slices for Behavioral Measurement. Frontiers in psychology 12","author":"Murphy A","year":"2021","unstructured":"Nora\u00a0A Murphy and Judith\u00a0A Hall. 2021. Capturing Behavior in Small Doses: A Review of Comparative Research in Evaluating Thin Slices for Behavioral Measurement. Frontiers in psychology 12 (2021), 667326."},{"key":"e_1_3_2_1_48_1","volume-title":"Context-Aware Body Gesture Generation for Social Robots. In ICRA 2022 Workshop on Prediction and Anticipation Reasoning for Human-Robot Interaction.","author":"Viet\u00a0Tuyen Nguyen Tan","year":"2022","unstructured":"Tan Viet\u00a0Tuyen Nguyen and Oya Celiktutan. 2022. Context-Aware Body Gesture Generation for Social Robots. In ICRA 2022 Workshop on Prediction and Anticipation Reasoning for Human-Robot Interaction."},{"key":"e_1_3_2_1_49_1","volume-title":"A Comprehensive Review of Data-Driven Co-Speech Gesture Generation. arXiv preprint arXiv:2301.05339","author":"Nyatsanga Simbarashe","year":"2023","unstructured":"Simbarashe Nyatsanga, Taras Kucherenko, Chaitanya Ahuja, Gustav\u00a0Eje Henter, and Michael Neff. 2023. A Comprehensive Review of Data-Driven Co-Speech Gesture Generation. arXiv preprint arXiv:2301.05339 (2023)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12193-018-0289-8"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.3389\/fcomp.2023.1081586"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Pascal Paysan Reinhard Knothe Brian Amberg Sami Romdhani and Thomas Vetter. 2009. A 3D face model for pose and illumination invariant face recognition. In 2009 sixth IEEE international conference on advanced video and signal based surveillance. Ieee 296\u2013301.","DOI":"10.1109\/AVSS.2009.58"},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems. 5\u20135.","author":"Pelachaud Catherine","year":"2015","unstructured":"Catherine Pelachaud. 2015. Greta: an interactive expressive embodied conversational agent. In Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems. 5\u20135."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3243017"},{"key":"e_1_3_2_1_55_1","volume-title":"Corpus \u00abCHEESE!\u00bb. TIPA. Travaux interdisciplinaires sur la parole et le langage38","author":"Priego-Valverde B\u00e9atrice","year":"2022","unstructured":"B\u00e9atrice Priego-Valverde, Brigitte Bigi, and Mary Amoyal. 2022. CHEESE!: Corpus \u00abCHEESE!\u00bb. TIPA. Travaux interdisciplinaires sur la parole et le langage38 (2022)."},{"key":"e_1_3_2_1_56_1","volume-title":"Automating the production of communicative gestures in embodied characters. Frontiers in psychology 9","author":"Ravenet Brian","year":"2018","unstructured":"Brian Ravenet, Catherine Pelachaud, Chlo\u00e9 Clavel, and Stacy Marsella. 2018. Automating the production of communicative gestures in embodied characters. Frontiers in psychology 9 (2018), 1144."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461967"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2019.04.005"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.70797"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.5334\/jors.187"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3125739.3132594"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.chb.2010.10.018"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2006.262464"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01251-8"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3462244.3479889"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2022.3149173"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10030228"},{"key":"e_1_3_2_1_69_1","volume-title":"Computer Graphics Forum, Vol.\u00a039","author":"Yang Yanzhe","unstructured":"Yanzhe Yang, Jimei Yang, and Jessica Hodgins. 2020. Statistics-based Motion Synthesis for Social Conversations. In Computer Graphics Forum, Vol.\u00a039. Wiley Online Library, 201\u2013212."},{"key":"e_1_3_2_1_70_1","volume-title":"5th Seminar on Speech Production: Models and Data.","author":"Yehia Hani","unstructured":"Hani Yehia, Takaaki Kuratate, and Eric Vatikiotis-Bateson. 2000. Facial animation and head motion driven by speech acoustics. In 5th Seminar on Speech Production: Models and Data. Kloster Seeon, Germany, 265\u2013268."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417838"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793720"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-27077-2_18"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558063"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"e_1_3_2_1_76_1","volume-title":"arXiv preprint arXiv:2205.15573","author":"Zhuang Wenlin","year":"2022","unstructured":"Wenlin Zhuang, Jinwei Qi, Peng Zhang, Bang Zhang, and Ping Tan. 2022. Text\/speech-driven full-body animation. arXiv preprint arXiv:2205.15573 (2022)."},{"key":"e_1_3_2_1_77_1","volume-title":"Multimodal Signals: Cognitive and Algorithmic Issues: COST Action 2102 and euCognition International School Vietri sul Mare, Italy","author":"Zoric Goranka","year":"2008","unstructured":"Goranka Zoric, Karlo Smid, and Igor\u00a0S Pandzic. 2009. Towards facial gestures generation by speech signal analysis using huge architecture. In Multimodal Signals: Cognitive and Algorithmic Issues: COST Action 2102 and euCognition International School Vietri sul Mare, Italy, April 21-26, 2008 Revised Selected and Invited Papers. Springer, 112\u2013120."}],"event":{"name":"ICMI '23: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Paris France","acronym":"ICMI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["International Cconference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3616547","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610661.3616547","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:31:16Z","timestamp":1755891076000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3616547"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,9]]},"references-count":77,"alternative-id":["10.1145\/3610661.3616547","10.1145\/3610661"],"URL":"https:\/\/doi.org\/10.1145\/3610661.3616547","relation":{},"subject":[],"published":{"date-parts":[[2023,10,9]]},"assertion":[{"value":"2023-10-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}