{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T11:02:59Z","timestamp":1742986979553,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031358937"},{"type":"electronic","value":"9783031358944"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-35894-4_33","type":"book-chapter","created":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T23:05:33Z","timestamp":1688857533000},"page":"435-452","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["I-Brow: Hierarchical and\u00a0Multimodal Transformer Model for\u00a0Eyebrows Animation Synthesis"],"prefix":"10.1007","author":[{"given":"Mireille","family":"Fares","sequence":"first","affiliation":[]},{"given":"Catherine","family":"Pelachaud","sequence":"additional","affiliation":[]},{"given":"Nicolas","family":"Obin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,9]]},"reference":[{"key":"33_CR1","doi-asserted-by":"crossref","unstructured":"Ahuja, C., Lee, D.W., Ishii, R., Morency, L.P.: No gestures left behind: Learning relationships between spoken language and freeform gestures. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, pp. 1884\u20131895 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.170"},{"key":"33_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"33_CR3","unstructured":"Baken, R.J., Orlikoff, R.F.: Clinical measurement of speech and voice. Cengage Learning (2000)"},{"issue":"4","key":"33_CR4","doi-asserted-by":"publisher","first-page":"1283","DOI":"10.1145\/1095878.1095881","volume":"24","author":"Y Cao","year":"2005","unstructured":"Cao, Y., Tien, W.C., Faloutsos, P., Pighin, F.: Expressive speech-driven facial animation. ACM Trans. Graph. (TOG) 24(4), 1283\u20131302 (2005)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"33_CR5","doi-asserted-by":"crossref","unstructured":"Cav\u00e9, C., Gua\u00eftella, I., Bertrand, R., Santi, S., Harlay, F., Espesser, R.: About the relationship between eyebrow movements and fo variations. In: Proceeding of Fourth International Conference on Spoken Language Processing. ICSLP\u201996. vol. 4, pp. 2175\u20132178. IEEE (1996)","DOI":"10.21437\/ICSLP.1996-551"},{"issue":"1\u20134","key":"33_CR6","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1080\/08351819109389361","volume":"25","author":"N Chovil","year":"1991","unstructured":"Chovil, N.: Discourse-oriented facial displays in conversation. Res. Lang. Social Interact. 25(1\u20134), 163\u2013194 (1991)","journal-title":"Res. Lang. Social Interact."},{"key":"33_CR7","doi-asserted-by":"crossref","unstructured":"Cudeiro, D., Bolkart, T., Laidlaw, C., Ranjan, A., Black, M.J.: Capture, learning, and synthesis of 3d speaking styles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10101\u201310111 (2019)","DOI":"10.1109\/CVPR.2019.01034"},{"key":"33_CR8","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/978-3-642-40415-3_19","volume-title":"Intelligent Virtual Agents","author":"Yu Ding","year":"2013","unstructured":"Ding, Yu., Pelachaud, C., Arti\u00e8res, T.: Modeling multimodal behaviors from speech prosody. In: Aylett, R., Krenn, B., Pelachaud, C., Shimodaira, H. (eds.) IVA 2013. LNCS (LNAI), vol. 8108, pp. 217\u2013228. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40415-3_19"},{"key":"33_CR9","doi-asserted-by":"crossref","unstructured":"Duarte, A.C., et al.: Wav2pix: Speech-conditioned face generation using generative adversarial networks. In: ICASSP, pp. 8633\u20138637 (2019)","DOI":"10.1109\/ICASSP.2019.8682970"},{"key":"33_CR10","volume-title":"What the face reveals: Basic and applied studies of spontaneous expression using the Facial Action Coding System (FACS)","author":"R Ekman","year":"1997","unstructured":"Ekman, R.: What the face reveals: Basic and applied studies of spontaneous expression using the Facial Action Coding System (FACS). Oxford University Press, USA (1997)"},{"key":"33_CR11","doi-asserted-by":"crossref","unstructured":"Fares, M.: Towards multimodal human-like characteristics and expressive visual prosody in virtual agents. In: Proceedings of the 2020 International Conference on Multimodal Interaction, pp. 743\u2013747 (2020)","DOI":"10.1145\/3382507.3421155"},{"key":"33_CR12","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/j.cag.2020.04.007","volume":"89","author":"Y Ferstl","year":"2020","unstructured":"Ferstl, Y., Neff, M., McDonnell, R.: Adversarial gesture generation with realistic gesture phasing. Compu. Graph. 89, 117\u2013130 (2020)","journal-title":"Compu. Graph."},{"key":"33_CR13","doi-asserted-by":"crossref","unstructured":"Garrido, P., et al.: Vdub: Modifying face video of actors for plausible visual alignment to a dubbed audio track. In: Computer graphics forum. vol. 34, pp. 193\u2013204. Wiley Online Library (2015)","DOI":"10.1111\/cgf.12552"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Ginosar, S., Bar, A., Kohavi, G., Chan, C., Owens, A., Malik, J.: Learning individual styles of conversational gesture. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2019)","DOI":"10.1109\/CVPR.2019.00361"},{"key":"33_CR15","unstructured":"Guo, Y.: A survey on methods and theories of quantized neural networks. arXiv preprint arXiv:1808.04752 (2018)"},{"key":"33_CR16","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1007\/978-3-319-47665-0_18","volume-title":"Intelligent Virtual Agents","author":"K Haag","year":"2016","unstructured":"Haag, K., Shimodaira, H.: Bidirectional LSTM networks employing stacked bottleneck features for expressive speech-driven head motion synthesis. In: Traum, D., Swartout, W., Khooshabeh, P., Kopp, S., Scherer, S., Leuski, A. (eds.) IVA 2016. LNCS (LNAI), vol. 10011, pp. 198\u2013207. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-47665-0_18"},{"key":"33_CR17","doi-asserted-by":"crossref","unstructured":"Hofer, G., Shimodaira, H.: Automatic head motion prediction from speech data (2007)","DOI":"10.21437\/Interspeech.2007-299"},{"issue":"11","key":"33_CR18","doi-asserted-by":"publisher","first-page":"1767","DOI":"10.1007\/s11263-019-01150-y","volume":"127","author":"A Jamaludin","year":"2019","unstructured":"Jamaludin, A., Chung, J.S., Zisserman, A.: You said that?: synthesising talking faces from audio. Int. J. Comput. Vision 127(11), 1767\u20131779 (2019)","journal-title":"Int. J. Comput. Vision"},{"issue":"4","key":"33_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. ACM Trans. Graph.(TOG) 36(4), 1\u201312 (2017)","journal-title":"ACM Trans. Graph.(TOG)"},{"key":"33_CR20","unstructured":"Knapp, M.L., Hall, J.A., Horgan, T.G.: Nonverbal communication in human interaction. Cengage Learning (2013)"},{"key":"33_CR21","doi-asserted-by":"crossref","unstructured":"Kucherenko, T., et al.: Gesticulator: A framework for semantically-aware speech-driven gesture generation. In: Proceedings of the 2020 International Conference on Multimodal Interaction, pp. 242\u2013250 (2020)","DOI":"10.1145\/3382507.3418815"},{"key":"33_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2022.102260","volume":"74","author":"X Li","year":"2022","unstructured":"Li, X., Zhang, J., Liu, Y.: Speech driven facial animation generation based on gan. Displays 74, 102260 (2022)","journal-title":"Displays"},{"key":"33_CR23","doi-asserted-by":"crossref","unstructured":"Lu, J., Shimodaira, H.: Prediction of head motion from speech waveforms with a canonical-correlation-constrained autoencoder. arXiv preprint arXiv:2002.01869 (2020)","DOI":"10.21437\/Interspeech.2020-1218"},{"issue":"8","key":"33_CR24","doi-asserted-by":"publisher","first-page":"2329","DOI":"10.1109\/TASL.2012.2201476","volume":"20","author":"S Mariooryad","year":"2012","unstructured":"Mariooryad, S., Busso, C.: Generating human-like behaviors using joint, speech-driven models for conversational agents. IEEE Trans. Audio Speech Lang. Process. 20(8), 2329\u20132340 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"33_CR25","doi-asserted-by":"crossref","unstructured":"Monzo, C., Iriondo, I., Socor\u00f3, J.C.: Voice quality modelling for expressive speech synthesis. The Scientific World Journal 2014 (2014)","DOI":"10.1155\/2014\/627189"},{"key":"33_CR26","doi-asserted-by":"crossref","unstructured":"Oh, T.H., Dekel, T., Kim, C., Mosseri, I., Freeman, W.T., Rubinstein, M., Matusik, W.: Speech2face: Learning the face behind a voice. In: Proceedings of the IEEE\/CVF Conference On Computer Vision and Pattern Recognition, pp. 7539\u20137548 (2019)","DOI":"10.1109\/CVPR.2019.00772"},{"issue":"11","key":"33_CR27","doi-asserted-by":"publisher","first-page":"487","DOI":"10.3390\/e19110487","volume":"19","author":"WQ Ong","year":"2017","unstructured":"Ong, W.Q., Tan, A.W.C., Vengadasalam, V.V., Tan, C.H., Ooi, T.H.: Real-time robust voice activity detection using the upper envelope weighted entropy measure and the dual-rate adaptive nonlinear filter. Entropy 19(11), 487 (2017)","journal-title":"Entropy"},{"key":"33_CR28","doi-asserted-by":"crossref","unstructured":"Palan, S., Schitter, C.: Prolific. ac-a subject pool for online experiments. J. Behav. Experiment. Finance 17, 22\u201327 (2018)","DOI":"10.1016\/j.jbef.2017.12.004"},{"key":"33_CR29","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1016\/j.specom.2019.04.005","volume":"110","author":"N Sadoughi","year":"2019","unstructured":"Sadoughi, N., Busso, C.: Speech-driven animation with meaningful behaviors. Speech Commun. 110, 90\u2013100 (2019)","journal-title":"Speech Commun."},{"key":"33_CR30","doi-asserted-by":"crossref","unstructured":"Salem, M., Rohlfing, K., Kopp, S., Joublin, F.: A friendly gesture: Investigating the effect of multimodal robot behavior in human-robot interaction. In: 2011 Ro-Man, pp. 247\u2013252. IEEE (2011)","DOI":"10.1109\/ROMAN.2011.6005285"},{"key":"33_CR31","doi-asserted-by":"crossref","unstructured":"Song, Y., Zhu, J., Li, D., Wang, X., Qi, H.: Talking face generation by conditional recurrent adversarial network. arXiv preprint arXiv:1804.04786 (2018)","DOI":"10.24963\/ijcai.2019\/129"},{"issue":"4","key":"33_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing obama: learning lip sync from audio. ACM Trans. Graph. (ToG) 36(4), 1\u201313 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"issue":"4","key":"33_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073699","volume":"36","author":"S Taylor","year":"2017","unstructured":"Taylor, S., Kim, T., Yue, Y., Mahler, M., Krahe, J., Rodriguez, A.G., Hodgins, J., Matthews, I.: A deep learning approach for generalized speech animation. ACM Trans. Graph. (TOG) 36(4), 1\u201311 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"33_CR34","unstructured":"Titze, I.: Principles of Voice Production. Prentice-Hall Inc. (1994)"},{"key":"33_CR35","unstructured":"Vaswani, A., et al.: Attention is all you need. arXiv preprint arXiv:1706.03762 (2017)"},{"key":"33_CR36","doi-asserted-by":"crossref","unstructured":"Vougioukas, K., Petridis, S., Pantic, M.: Realistic speech-driven facial animation with gans. Int. J. Comput. Vision, pp. 1\u201316 (2019)","DOI":"10.1007\/s11263-019-01251-8"},{"key":"33_CR37","unstructured":"Wan, V., et al.: Photo-realistic expressive text to talking head synthesis. In: INTERSPEECH, pp. 2667\u20132669 (2013)"},{"key":"33_CR38","doi-asserted-by":"crossref","unstructured":"Wolfert, P., Robinson, N., Belpaeme, T.: A review of evaluation practices of gesture generation in embodied conversational agents. arXiv preprint arXiv:2101.03769 (2021)","DOI":"10.1109\/THMS.2022.3149173"},{"issue":"6","key":"33_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3414685.3417838","volume":"39","author":"Y Yoon","year":"2020","unstructured":"Yoon, Y., et al.: Speech gesture generation from the trimodal context of text, audio, and speaker identity. ACM Trans. Graph.(TOG) 39(6), 1\u201316 (2020)","journal-title":"ACM Trans. Graph.(TOG)"},{"key":"33_CR40","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.neucom.2021.07.072","volume":"462","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Wang, J., Zhang, X.: Conciseness is better: recurrent attention lstm model for document-level sentiment analysis. Neurocomputing 462, 101\u2013112 (2021)","journal-title":"Neurocomputing"},{"key":"33_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, H., Liu, Y., Liu, Z., Luo, P., Wang, X.: Talking face generation by adversarially disentangled audio-visual representation. In: Proceedings of the AAAI conference on artificial intelligence. vol. 33, pp. 9299\u20139306 (2019)","DOI":"10.1609\/aaai.v33i01.33019299"},{"issue":"1","key":"33_CR42","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/s11042-010-0526-y","volume":"54","author":"G Zoric","year":"2011","unstructured":"Zoric, G., Forchheimer, R., Pandzic, I.S.: On creating multimodal virtual humans-real time speech driven facial gesturing. Multimedia Tools Appl. 54(1), 165\u2013179 (2011)","journal-title":"Multimedia Tools Appl."},{"key":"33_CR43","doi-asserted-by":"crossref","unstructured":"Zoric, G., Smid, K., Pandzic, I.S.: Automated gesturing for embodied animated agent: Speech-driven and text-driven approaches. J. Multimedia 1(1)","DOI":"10.4304\/jmm.1.1.62-68"},{"key":"33_CR44","doi-asserted-by":"crossref","unstructured":"Zoric, G., Smid, K., Pandzic, I.S.: Facial gestures: taxonomy and application of non-verbal, non-emotional facial displays for embodied conversational agents. Conversational Informatics: An Engineering Approach, pp. 161\u2013182 (2007)","DOI":"10.1002\/9780470512470.ch9"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in HCI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-35894-4_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,3]],"date-time":"2023-09-03T12:04:38Z","timestamp":1693742678000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-35894-4_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031358937","9783031358944"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-35894-4_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"9 July 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Copenhagen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denmark","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.hci.international\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7472","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1578","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"396","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}