{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T03:18:02Z","timestamp":1775099882483,"version":"3.50.1"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2012,10,1]],"date-time":"2012-10-01T00:00:00Z","timestamp":1349049600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2012,10]]},"DOI":"10.1109\/tasl.2012.2201476","type":"journal-article","created":{"date-parts":[[2012,5,29]],"date-time":"2012-05-29T15:36:04Z","timestamp":1338305764000},"page":"2329-2340","source":"Crossref","is-referenced-by-count":48,"title":["Generating Human-Like Behaviors Using Joint, Speech-Driven Models for Conversational Agents"],"prefix":"10.1109","volume":"20","author":[{"given":"Soroosh","family":"Mariooryad","sequence":"first","affiliation":[]},{"given":"Carlos","family":"Busso","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2002.804888"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1987.4767965"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"1274","DOI":"10.1155\/S1110865702206083","article-title":"Dynamic Bayesian networks for audio-visual speech recognition","volume":"2002","author":"nefian","year":"2002","journal-title":"EURASIP J Appl Signal Process"},{"key":"ref32","author":"zoric","year":"2010","journal-title":"Hybrid Approach to Real-Time Speech Driven Facial Gesturing of Virtual Characters"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011171430700"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/41.661300"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1002\/0470854626"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1095878.1095881"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2006.262743"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3115\/1610065.1610066"},{"key":"ref40","author":"boersma","year":"1996","journal-title":"Praat A System for Doing Phonetics by Computer"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.885910"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/1073012.1073028"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1015706.1015753"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/cav.80"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-84628-907-1_6"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.905145"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607897"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00048-X"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.02.017"},{"key":"ref28","first-page":"265","article-title":"Facial animation and head motion driven by speech acoustics","author":"yehia","year":"2000","journal-title":"Proc 5th Seminar Speech Production Models and Data"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/192161.192272"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CA.2002.1017501"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"ref6","first-page":"75","article-title":"Multimodal signal analysis of prosody and hand motion: Temporal correlation of speech and gestures","author":"valbonesi","year":"2002","journal-title":"Proc Eur Signal Process Conf (EUSIPCO 02)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/49.81953"},{"key":"ref5","author":"mcneill","year":"1992","journal-title":"Hand and Mind What Gestures Reveal about Thought"},{"key":"ref8","first-page":"105","article-title":"Expressing uncertainty with a talking head","author":"marsi","year":"2007","journal-title":"Proc Workshop Multimodal Output Generation (MOG 2007)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/S0950-7051(00)00102-7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00084-5"},{"key":"ref9","first-page":"154188","author":"poggi","year":"2000","journal-title":"Embodied Conversational Agents"},{"key":"ref1","first-page":"43","article-title":"Joint analysis of the emotional fingerprint the face and speech: A single subject study","author":"busso","year":"2007","journal-title":"Int Workshop Multimedia Signal Process (MMSP 2007)"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-59789-3_51"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607235"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/989863.989935"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/AFGR.2002.1004186"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2008.2009578"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.12.003"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007425814087"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303150"},{"key":"ref41","author":"jensen","year":"2010","journal-title":"Bayesian Networks and Decision Graphs"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1111\/j.0963-7214.2004.01502010.x"},{"key":"ref44","author":"eberly","year":"2000","journal-title":"3D Game Engine Design A Practical Approach to Real-Time Computer Graphics"},{"key":"ref26","first-page":"208","article-title":"An expressive ECA showing complex emotions","author":"bevacqua","year":"2007","journal-title":"Proc Artif Intell and Simulation of Behaviour (AISB 2007) Annu Conv"},{"key":"ref43","author":"murphy","year":"2002","journal-title":"Dynamic Bayesian Networks Representation Inference and Learning"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog2001_1"}],"container-title":["IEEE Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/10376\/6263279\/06205334.pdf?arnumber=6205334","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,10]],"date-time":"2021-10-10T23:54:13Z","timestamp":1633910053000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6205334\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,10]]},"references-count":47,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tasl.2012.2201476","relation":{},"ISSN":["1558-7916","1558-7924"],"issn-type":[{"value":"1558-7916","type":"print"},{"value":"1558-7924","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,10]]}}}