{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T17:39:43Z","timestamp":1778002783475,"version":"3.51.4"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1718944"],"award-info":[{"award-number":["IIS-1718944"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Affective Comput."],"published-print":{"date-parts":[[2022,10,1]]},"DOI":"10.1109\/taffc.2022.3216993","type":"journal-article","created":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T22:41:43Z","timestamp":1667515303000},"page":"2156-2170","source":"Crossref","is-referenced-by-count":46,"title":["Robust Audiovisual Emotion Recognition: Aligning Modalities, Capturing Temporal Information, and Handling Missing Features"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9613-1002","authenticated-orcid":false,"given":"Lucas","family":"Goncalves","sequence":"first","affiliation":[{"name":"Erik Jonsson School of Engineering &#x0026; Commputer Science, The University of Texas at Dallas, Richardson, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4075-4072","authenticated-orcid":false,"given":"Carlos","family":"Busso","sequence":"additional","affiliation":[{"name":"Erik Jonsson School of Engineering &#x0026; Commputer Science, The University of Texas at Dallas, Richardson, TX, USA"}]}],"member":"263","reference":[{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2975922"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747705"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2007.906583"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1155\/2007\/70186"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2018.07.041"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.296"},{"key":"ref31","first-page":"27","article-title":"Learning the kernel matrix with semidefinite programming","volume":"5","author":"lanckriet","year":"2004","journal-title":"J Mach Learn Res"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666277"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2018.8486455"},{"key":"ref36","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2594"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016.2603342"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-56"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3023632"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s12193-015-0203-6"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.216"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2006.11.004"},{"key":"ref65","first-page":"1","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref66","first-page":"1","article-title":"Stand-alone self-attention in vision models","author":"ramachandran","year":"2019","journal-title":"Proc Conf Neural Inf Process Syst"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2008.921737"},{"key":"ref67","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proc North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11012"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1438"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.52"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISM.2008.40"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830596"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s12193-009-0025-5"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666274"},{"key":"ref23","first-page":"543","article-title":"Combining modality specific deep neural networks for emotion recognition in video","author":"kahou","year":"2013","journal-title":"Proc Int Conf Multimodal Interact"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1023\/A:1021709817809"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/T-AFFC.2010.16","article-title":"Emotion recognition of affective speech based on multiple classifiers using acoustic-prosodic information and semantic labels","volume":"2","author":"wu","year":"2011","journal-title":"IEEE Trans Affect Comput"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00021"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2493525"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2407898"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2515617"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2336244"},{"key":"ref54","first-page":"1","article-title":"Adaptive input representations for neural language modeling","author":"baevski","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1176"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.5244\/C.29.41"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2003.817150"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1994.389567"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682863"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuropsychologia.2006.01.001"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1037\/a0019952"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2013.11"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2009-480"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2014.11"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3395035.3425202"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.03.001"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2021.3056421"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JEEIT.2019.8717422"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICICS.1997.647126"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.857572"},{"key":"ref7","first-page":"366","article-title":"Multimodal human emotion \/ expression recognition","author":"chen","year":"1999","journal-title":"Proc IEEE 3rd Int Conf Autom Face Gesture Recognit"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747157"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1075\/z.99.32ben"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428219"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403182"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240528"},{"key":"ref47","article-title":"Maximum likelihood estimation for multimodal learning with missing modality","author":"ma","year":"2021"},{"key":"ref42","article-title":"Audio visual emotion recognition with temporal alignment and perception attention","author":"chao","year":"2016"},{"key":"ref41","first-page":"251","article-title":"Out of time: Automated lip sync in the wild","author":"chung","year":"2016","journal-title":"Proc Asian Conf Comput Vis"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i02.5492"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093345"}],"container-title":["IEEE Transactions on Affective Computing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/5165369\/9964459\/9928357-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5165369\/9964459\/09928357.pdf?arnumber=9928357","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T19:39:40Z","timestamp":1671478780000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9928357\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,1]]},"references-count":71,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/taffc.2022.3216993","relation":{},"ISSN":["1949-3045","2371-9850"],"issn-type":[{"value":"1949-3045","type":"electronic"},{"value":"2371-9850","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,1]]}}}