{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T01:41:39Z","timestamp":1781833299223,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,14]],"date-time":"2019-10-14T00:00:00Z","timestamp":1571011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1722822"],"award-info":[{"award-number":["1722822"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,14]]},"DOI":"10.1145\/3340555.3353725","type":"proceedings-article","created":{"date-parts":[[2019,10,17]],"date-time":"2019-10-17T12:49:48Z","timestamp":1571316588000},"page":"74-84","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":52,"title":["To React or not to React: End-to-End Visual Pose Forecasting for Personalized Avatar during Dyadic Conversations"],"prefix":"10.1145","author":[{"given":"Chaitanya","family":"Ahuja","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shugao","family":"Ma","sequence":"additional","affiliation":[{"name":"Facebook Reality Labs, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Louis-Philippe","family":"Morency","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yaser","family":"Sheikh","sequence":"additional","affiliation":[{"name":"Facebook Reality Labs, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2019,10,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925893"},{"key":"e_1_3_2_1_2_1","unstructured":"Chaitanya Ahuja and Louis-Philippe Morency. 2018. Lattice Recurrent Unit: Improving Convergence and Statistical Efficiency for Sequence Modeling. In AAAI-18. 4996\u20135003. https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI18\/paper\/view\/17394  Chaitanya Ahuja and Louis-Philippe Morency. 2018. Lattice Recurrent Unit: Improving Convergence and Statistical Efficiency for Sequence Modeling. In AAAI-18. 4996\u20135003. https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI18\/paper\/view\/17394"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.471"},{"key":"e_1_3_2_1_4_1","unstructured":"Shaojie Bai J\u00a0Zico Kolter and Vladlen Koltun. 2018. An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271(2018).  Shaojie Bai J\u00a0Zico Kolter and Vladlen Koltun. 2018. An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271(2018)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1162\/pres.15.4.359"},{"key":"e_1_3_2_1_6_1","volume-title":"Multimodal machine learning: A survey and taxonomy","author":"Baltru\u0161aitis Tadas","year":"2018","unstructured":"Tadas Baltru\u0161aitis , Chaitanya Ahuja , and Louis-Philippe Morency . 2018. Multimodal machine learning: A survey and taxonomy . IEEE Transactions on Pattern Analysis and Machine Intelligence ( 2018 ). Tadas Baltru\u0161aitis, Chaitanya Ahuja, and Louis-Philippe Morency. 2018. Multimodal machine learning: A survey and taxonomy. IEEE Transactions on Pattern Analysis and Machine Intelligence (2018)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/311535.311537"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1080\/088395199117360"},{"key":"e_1_3_2_1_9_1","volume-title":"Life-Like Characters","author":"Cassell Justine","unstructured":"Justine Cassell , Hannes\u00a0H\u00f6gni Vilhj\u00e1lmsson , and Timothy Bickmore . 2004. Beat: the behavior expression animation toolkit . In Life-Like Characters . Springer , 163\u2013185. Justine Cassell, Hannes\u00a0H\u00f6gni Vilhj\u00e1lmsson, and Timothy Bickmore. 2004. Beat: the behavior expression animation toolkit. In Life-Like Characters. Springer, 163\u2013185."},{"key":"e_1_3_2_1_10_1","unstructured":"Yu-Wei Chao Jimei Yang Brian\u00a0L Price Scott Cohen and Jia Deng. [n. d.]. Forecasting Human Dynamics from Static Images.  Yu-Wei Chao Jimei Yang Brian\u00a0L Price Scott Cohen and Jia Deng. [n. d.]. Forecasting Human Dynamics from Static Images."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23974-8_14"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-21996-7_17"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00743"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601119"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Allen\u00a0T Dittmann. 1972. The body movement-speech rhythm relationship as a cue to speech encoding. Studies in dyadic communication(1972) 135\u2013152.  Allen\u00a0T Dittmann. 1972. The body movement-speech rhythm relationship as a cue to speech encoding. Studies in dyadic communication(1972) 135\u2013152.","DOI":"10.1016\/B978-0-08-015867-9.50011-3"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Tony Ezzat Gadi Geiger and Tomaso Poggio. 2002. Trainable videorealistic speech animation. Vol.\u00a021. ACM.  Tony Ezzat Gadi Geiger and Tomaso Poggio. 2002. Trainable videorealistic speech animation. Vol.\u00a021. ACM.","DOI":"10.1145\/566654.566594"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.494"},{"key":"e_1_3_2_1_20_1","unstructured":"Ruohan Gao Rogerio Feris and Kristen Grauman. 2018. Learning to separate object sounds by watching unlabeled video. arXiv preprint arXiv:1804.01665(2018).  Ruohan Gao Rogerio Feris and Kristen Grauman. 2018. Learning to separate object sounds by watching unlabeled video. arXiv preprint arXiv:1804.01665(2018)."},{"key":"e_1_3_2_1_21_1","volume-title":"Supervised sequence labelling with recurrent neural networks","author":"Graves Alex","unstructured":"Alex Graves . 2012. Supervised sequence labelling . In Supervised sequence labelling with recurrent neural networks . Springer , 5\u201313. Alex Graves. 2012. Supervised sequence labelling. In Supervised sequence labelling with recurrent neural networks. Springer, 5\u201313."},{"key":"e_1_3_2_1_22_1","volume-title":"A Recurrent Variational Autoencoder for Human Motion Synthesis. BMVC17","author":"Habibie Ikhsanul","year":"2017","unstructured":"Ikhsanul Habibie , Daniel Holden , Jonathan Schwarz , Joe Yearsley , and Taku Komura . 2017. A Recurrent Variational Autoencoder for Human Motion Synthesis. BMVC17 ( 2017 ). Ikhsanul Habibie, Daniel Holden, Jonathan Schwarz, Joe Yearsley, and Taku Komura. 2017. A Recurrent Variational Autoencoder for Human Motion Synthesis. BMVC17 (2017)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1177\/002383098402700404"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201399"},{"key":"e_1_3_2_1_25_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber . 1997. Long short-term memory. Neural computation 9, 8 ( 1997 ), 1735\u20131780. Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735\u20131780."},{"key":"e_1_3_2_1_26_1","volume-title":"Multimodal human\u2013computer interaction: A survey. Computer vision and image understanding 108, 1-2","author":"Jaimes Alejandro","year":"2007","unstructured":"Alejandro Jaimes and Nicu Sebe . 2007. Multimodal human\u2013computer interaction: A survey. Computer vision and image understanding 108, 1-2 ( 2007 ), 116\u2013134. Alejandro Jaimes and Nicu Sebe. 2007. Multimodal human\u2013computer interaction: A survey. Computer vision and image understanding 108, 1-2 (2007), 116\u2013134."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1460-2466.2002.tb02559.x"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073658"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/11821830_20"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201401"},{"key":"e_1_3_2_1_31_1","unstructured":"Dario Pavllo David Grangier and Michael Auli. 2018. QuaterNet: A Quaternion-based Recurrent Model for Human Motion. arXiv preprint arXiv:1805.06485(2018).  Dario Pavllo David Grangier and Michael Auli. 2018. QuaterNet: A Quaternion-based Recurrent Model for Human Motion. arXiv preprint arXiv:1805.06485(2018)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33197-8_47"},{"key":"e_1_3_2_1_34_1","volume-title":"Recognizing emotions expressed by body pose: A biologically inspired neural model. Neural networks 21, 9","author":"Schindler Konrad","year":"2008","unstructured":"Konrad Schindler , Luc Van\u00a0Gool , and Beatrice de Gelder . 2008. Recognizing emotions expressed by body pose: A biologically inspired neural model. Neural networks 21, 9 ( 2008 ), 1238\u20131246. Konrad Schindler, Luc Van\u00a0Gool, and Beatrice de Gelder. 2008. Recognizing emotions expressed by body pose: A biologically inspired neural model. Neural networks 21, 9 (2008), 1238\u20131246."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Tomas Simon Hanbyul Joo Iain\u00a0A Matthews and Yaser Sheikh. 2017. Hand Keypoint Detection in Single Images Using Multiview Bootstrapping.. In CVPR Vol.\u00a01. 2.  Tomas Simon Hanbyul Joo Iain\u00a0A Matthews and Yaser Sheikh. 2017. Hand Keypoint Detection in Single Images Using Multiview Bootstrapping.. In CVPR Vol.\u00a01. 2.","DOI":"10.1109\/CVPR.2017.494"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICICES.2017.8070720"},{"key":"e_1_3_2_1_37_1","volume-title":"Immersed in Media","author":"Steed Anthony","unstructured":"Anthony Steed and Ralph Schroeder . 2015. Collaboration in Immersive and Non-immersive Virtual Environments . In Immersed in Media . Springer , 263\u2013282. Anthony Steed and Ralph Schroeder. 2015. Collaboration in Immersive and Non-immersive Virtual Environments. In Immersed in Media. Springer, 263\u2013282."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073640"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3125739.3132594"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073699"},{"key":"e_1_3_2_1_41_1","unstructured":"A\u00e4ron Van Den\u00a0Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew\u00a0W Senior and Koray Kavukcuoglu. [n. d.]. WaveNet: A generative model for raw audio.  A\u00e4ron Van Den\u00a0Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew\u00a0W Senior and Koray Kavukcuoglu. [n. d.]. WaveNet: A generative model for raw audio."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Petra Wagner Zofia Malisz and Stefan Kopp. 2014. Gesture and speech in interaction: An overview.  Petra Wagner Zofia Malisz and Stefan Kopp. 2014. Gesture and speech in interaction: An overview.","DOI":"10.1016\/j.specom.2013.09.008"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0378-2166(99)00109-5"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.03.001"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Amir Zadeh Paul\u00a0Pu Liang Soujanya Poria Prateek Vij Erik Cambria and Louis-Philippe Morency. 2018. Multi-attention recurrent network for human communication comprehension. arXiv preprint arXiv:1802.00923(2018).  Amir Zadeh Paul\u00a0Pu Liang Soujanya Poria Prateek Vij Erik Cambria and Louis-Philippe Morency. 2018. Multi-attention recurrent network for human communication comprehension. arXiv preprint arXiv:1802.00923(2018).","DOI":"10.1609\/aaai.v32i1.12024"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Hang Zhao Chuang Gan Andrew Rouditchenko Carl Vondrick Josh McDermott and Antonio Torralba. 2018. The sound of pixels. arXiv preprint arXiv:1804.03160(2018).  Hang Zhao Chuang Gan Andrew Rouditchenko Carl Vondrick Josh McDermott and Antonio Torralba. 2018. The sound of pixels. arXiv preprint arXiv:1804.03160(2018).","DOI":"10.1007\/978-3-030-01246-5_35"}],"event":{"name":"ICMI '19: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Suzhou China","acronym":"ICMI '19"},"container-title":["2019 International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3340555.3353725","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3340555.3353725","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3340555.3353725","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:43Z","timestamp":1750197763000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3340555.3353725"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,14]]},"references-count":46,"alternative-id":["10.1145\/3340555.3353725","10.1145\/3340555"],"URL":"https:\/\/doi.org\/10.1145\/3340555.3353725","relation":{},"subject":[],"published":{"date-parts":[[2019,10,14]]},"assertion":[{"value":"2019-10-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}