{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:23:43Z","timestamp":1776889423414,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,21]],"date-time":"2020-10-21T00:00:00Z","timestamp":1603238400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["IIS:1718944"],"award-info":[{"award-number":["IIS:1718944"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,21]]},"DOI":"10.1145\/3382507.3418872","type":"proceedings-article","created":{"date-parts":[[2020,10,22]],"date-time":"2020-10-22T10:04:34Z","timestamp":1603361074000},"page":"397-405","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["MSP-Face Corpus"],"prefix":"10.1145","author":[{"given":"Andrea","family":"Vidal","sequence":"first","affiliation":[{"name":"University of Texas at Dallas, Richardson, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ali","family":"Salman","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Cheng","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carlos","family":"Busso","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,10,22]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Study Of Dense Network Approaches For Speech Emotion Recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2018","author":"Abdelwahab M.","year":"2018","unstructured":"M. Abdelwahab and C. Busso . 2018 . Study Of Dense Network Approaches For Speech Emotion Recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2018 ). Calgary, AB, Canada, 5084--5088. https:\/\/doi.org\/10.1109\/ICASSP. 2018 .8461866 10.1109\/ICASSP.2018.8461866 M. Abdelwahab and C. Busso. 2018. Study Of Dense Network Approaches For Speech Emotion Recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2018). Calgary, AB, Canada, 5084--5088. https:\/\/doi.org\/10.1109\/ICASSP.2018.8461866"},{"key":"e_1_3_2_2_2_1","volume-title":"The OpenCV Library. Dr. Dobb's Journal of Software Tools","author":"Bradski G.","year":"2000","unstructured":"G. Bradski . 2000. The OpenCV Library. Dr. Dobb's Journal of Software Tools ( 2000 ). G. Bradski. 2000. The OpenCV Library. Dr. Dobb's Journal of Software Tools (2000)."},{"key":"#cr-split#-e_1_3_2_2_3_1.1","doi-asserted-by":"crossref","unstructured":"A. Burmania and C. Busso. 2017. A Stepwise Analysis of Aggregated Crowdsourced Labels Describing Multimodal Emotional Behaviors. In Interspeech 2017. Stockholm Sweden 152--157. https:\/\/doi.org\/10.21437\/Interspeech.2017--1278 10.21437\/Interspeech.2017--1278","DOI":"10.21437\/Interspeech.2017-1278"},{"key":"#cr-split#-e_1_3_2_2_3_1.2","doi-asserted-by":"crossref","unstructured":"A. Burmania and C. Busso. 2017. A Stepwise Analysis of Aggregated Crowdsourced Labels Describing Multimodal Emotional Behaviors. In Interspeech 2017. Stockholm Sweden 152--157. https:\/\/doi.org\/10.21437\/Interspeech.2017--1278","DOI":"10.21437\/Interspeech.2017-1278"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2493525"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"#cr-split#-e_1_3_2_2_6_1.1","doi-asserted-by":"crossref","unstructured":"C. Busso M. Bulut and S.S. Narayanan. 2013. Toward effective automatic recognition systems of emotion in speech. In Social emotions in nature and 1https:\/\/ecs.utdallas.edu\/research\/researchlabs\/msp-lab\/MSP-Face.html artifact: emotions in human and human-computer interaction J. Gratch and S. Marsella (Eds.). Oxford University Press New York NY USA 110--127. https:\/\/doi.org\/10.1093\/acprof:oso\/9780195387643.003.0008 10.1093\/acprof:oso","DOI":"10.1093\/acprof:oso\/9780195387643.003.0008"},{"key":"#cr-split#-e_1_3_2_2_6_1.2","doi-asserted-by":"crossref","unstructured":"C. Busso M. Bulut and S.S. Narayanan. 2013. Toward effective automatic recognition systems of emotion in speech. In Social emotions in nature and 1https:\/\/ecs.utdallas.edu\/research\/researchlabs\/msp-lab\/MSP-Face.html artifact: emotions in human and human-computer interaction J. Gratch and S. Marsella (Eds.). Oxford University Press New York NY USA 110--127. https:\/\/doi.org\/10.1093\/acprof:oso\/9780195387643.003.0008","DOI":"10.1093\/acprof:oso\/9780195387643.003.0008"},{"key":"e_1_3_2_2_7_1","volume-title":"Speech and Multimodal Information. In Sixth International Conference on Multimodal Interfaces ICMI","author":"Busso C.","year":"2004","unstructured":"C. Busso , Z. Deng , S. Yildirim , M. Bulut , C.M. Lee , A. Kazemzadeh , S. Lee , U. Neumann , and S. Narayanan . 2004. Analysis of Emotion Recognition using Facial Expressions , Speech and Multimodal Information. In Sixth International Conference on Multimodal Interfaces ICMI 2004 . ACM Press, State College, PA, 205--211. https:\/\/doi.org\/10.1145\/1027933.1027968 10.1145\/1027933.1027968 C. Busso, Z. Deng, S. Yildirim, M. Bulut, C.M. Lee, A. Kazemzadeh, S. Lee, U. Neumann, and S. Narayanan. 2004. Analysis of Emotion Recognition using Facial Expressions, Speech and Multimodal Information. In Sixth International Conference on Multimodal Interfaces ICMI 2004. ACM Press, State College, PA, 205--211. https:\/\/doi.org\/10.1145\/1027933.1027968"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2515617"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","unstructured":"C. Busso and T. Rahman. 2012. Unveiling the Acoustic Properties that Describe the Valence Dimension. In Interspeech 2012. Portland OR USA 1179--1182.  C. Busso and T. Rahman. 2012. Unveiling the Acoustic Properties that Describe the Valence Dimension. In Interspeech 2012. Portland OR USA 1179--1182.","DOI":"10.21437\/Interspeech.2012-124"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2336244"},{"key":"e_1_3_2_2_11_1","volume-title":"ArcFace: Additive Angular Margin Loss for Deep Face Recognition. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019","author":"Deng J.","year":"2019","unstructured":"J. Deng , J. Guo , N. Xue , and S. Zafeiriou . 2019 . ArcFace: Additive Angular Margin Loss for Deep Face Recognition. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019 ). Long Beach, CA, USA, 4685--4694. https:\/\/doi.org\/10.1109\/CVPR. 2019 .00482 10.1109\/CVPR.2019.00482 J. Deng, J. Guo, N. Xue, and S. Zafeiriou. 2019. ArcFace: Additive Angular Margin Loss for Deep Face Recognition. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019). Long Beach, CA, USA, 4685--4694. https:\/\/doi.org\/10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_2_12_1","volume-title":"Multi-task Learning of Facial Landmarks and Expression. In Canadian Conference on Computer and Robot Vision. Montreal, QC, Canada, 98--103","author":"Devries T.","year":"2014","unstructured":"T. Devries , K. Biswaranjan , and G. W. Taylor . 2014 . Multi-task Learning of Facial Landmarks and Expression. In Canadian Conference on Computer and Robot Vision. Montreal, QC, Canada, 98--103 . https:\/\/doi.org\/10.1109\/CRV. 2014 .21 10.1109\/CRV.2014.21 T. Devries, K. Biswaranjan, and G. W. Taylor. 2014. Multi-task Learning of Facial Landmarks and Expression. In Canadian Conference on Computer and Robot Vision. Montreal, QC, Canada, 98--103. https:\/\/doi.org\/10.1109\/CRV.2014.21"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/02699939208411068"},{"key":"e_1_3_2_2_14_1","volume-title":"ACM International conference on Multimedia (MM","author":"Eyben F.","year":"2010","unstructured":"F. Eyben , M. W\u00f6llmer , and B. Schuller . 2010. OpenSMILE: the Munich versatile and fast open-source audio feature extractor . In ACM International conference on Multimedia (MM 2010 ). Florence, Italy, 1459--1462. F. Eyben, M. W\u00f6llmer, and B. Schuller. 2010. OpenSMILE: the Munich versatile and fast open-source audio feature extractor. In ACM International conference on Multimedia (MM 2010). Florence, Italy, 1459--1462."},{"key":"e_1_3_2_2_15_1","unstructured":"L. Fischer D. Brauns and F. Belschak. 2002. Zur Messung von Emotionen in der angewandten Forschung. Pabst Science Publishers Lengerich.  L. Fischer D. Brauns and F. Belschak. 2002. Zur Messung von Emotionen in der angewandten Forschung. Pabst Science Publishers Lengerich."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2007.01.010"},{"key":"e_1_3_2_2_17_1","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019","author":"Harvill J.","year":"2019","unstructured":"J. Harvill , M. AbdelWahab , R. Lotfian , and C. Busso . 2019. Retrieving Speech Samples with Similar Emotional Content Using a Triplet Loss Function . In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019 ). Brighton, UK, 7400--7404. https:\/\/doi.org\/10.1109\/ICASSP. 2019 .8683273 10.1109\/ICASSP.2019.8683273 J. Harvill, M. AbdelWahab, R. Lotfian, and C. Busso. 2019. Retrieving Speech Samples with Similar Emotional Content Using a Triplet Loss Function. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019). Brighton, UK, 7400--7404. https:\/\/doi.org\/10.1109\/ICASSP.2019.8683273"},{"key":"e_1_3_2_2_18_1","volume-title":"SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild","author":"Kossaifi J.","year":"2020","unstructured":"J. Kossaifi , R. Walecki , Y. Panagakis , J. Shen , M. Schmitt , F. Ringeval , J. Han , V. Pandit , A. Toisoul , B. Schuller , K. Star , E. Hajiyev , and M. Pantic . 2020 . SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild . IEEE Transactions on Pattern Analysis and Machine Intelligence ( 2020). https:\/\/doi.org\/10.1109\/TPAMI.2019.2944808 10.1109\/TPAMI.2019.2944808 J. Kossaifi, R. Walecki, Y. Panagakis, J. Shen, M. Schmitt, F. Ringeval, J. Han, V. Pandit, A. Toisoul, B. Schuller, K. Star, E. Hajiyev, and M. Pantic. 2020. SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild. IEEE Transactions on Pattern Analysis and Machine Intelligence (2020). https:\/\/doi.org\/10.1109\/TPAMI.2019.2944808"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"e_1_3_2_2_20_1","volume-title":"International Conference on Affective Computing and Intelligent Interaction (ACII 2017","author":"Lotfian R.","year":"2017","unstructured":"R. Lotfian and C. Busso . 2017. Formulating Emotion Perception as a Probabilistic Model with Application to Categorical Emotion Classification . In International Conference on Affective Computing and Intelligent Interaction (ACII 2017 ). San Antonio, TX, USA, 415--420. https:\/\/doi.org\/10.1109\/ACII. 2017 .8273633 10.1109\/ACII.2017.8273633 R. Lotfian and C. Busso. 2017. Formulating Emotion Perception as a Probabilistic Model with Application to Categorical Emotion Classification. In International Conference on Affective Computing and Intelligent Interaction (ACII 2017). San Antonio, TX, USA, 415--420. https:\/\/doi.org\/10.1109\/ACII.2017.8273633"},{"key":"#cr-split#-e_1_3_2_2_21_1.1","doi-asserted-by":"crossref","unstructured":"R. Lotfian and C. Busso. 2018. Predicting Categorical Emotions by Jointly Learning Primary and Secondary Emotions Through Multitask Learning. In Interspeech 2018. Hyderabad India 951--955. https:\/\/doi.org\/10.21437\/Interspeech.2018--2464 10.21437\/Interspeech.2018--2464","DOI":"10.21437\/Interspeech.2018-2464"},{"key":"#cr-split#-e_1_3_2_2_21_1.2","doi-asserted-by":"crossref","unstructured":"R. Lotfian and C. Busso. 2018. Predicting Categorical Emotions by Jointly Learning Primary and Secondary Emotions Through Multitask Learning. In Interspeech 2018. Hyderabad India 951--955. https:\/\/doi.org\/10.21437\/Interspeech.2018--2464","DOI":"10.21437\/Interspeech.2018-2464"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2736999"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDEW.2006.145"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.20"},{"key":"e_1_3_2_2_25_1","volume-title":"The USC CreativeIT Database: A Multimodal Database of Theatrical Improvisation. In Workshop on Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality (MMC","author":"Metallinou A.","year":"2010","unstructured":"A. Metallinou , C.-C. Lee , C. Busso , S. Carnicke , and S. Narayanan . 2010 . The USC CreativeIT Database: A Multimodal Database of Theatrical Improvisation. In Workshop on Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality (MMC 2010 ). Valletta, Malta. A. Metallinou, C.-C. Lee, C. Busso, S. Carnicke, and S. Narayanan. 2010. The USC CreativeIT Database: A Multimodal Database of Theatrical Improvisation. In Workshop on Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality (MMC 2010). Valletta, Malta."},{"key":"e_1_3_2_2_26_1","first-page":"3","article-title":"The USC CreativeIT Database of Multimodal Dyadic Interactions: From Speech and Full Body Motion Capture to Continuous Emotional Annotations","volume":"50","author":"Metallinou A.","year":"2016","unstructured":"A. Metallinou , Z. Yang , C.-C. Lee , C. Busso , S. Carnicke , and S. Narayanan . 2016 . The USC CreativeIT Database of Multimodal Dyadic Interactions: From Speech and Full Body Motion Capture to Continuous Emotional Annotations . Journal of Language Resources and Evaluation 50 , 3 (September 2016), 497--521. https:\/\/doi.org\/10.1007\/s10579-015--9300-0 10.1007\/s10579-015--9300-0 A. Metallinou, Z. Yang, C.-C. Lee, C. Busso, S. Carnicke, and S. Narayanan. 2016. The USC CreativeIT Database of Multimodal Dyadic Interactions: From Speech and Full Body Motion Capture to Continuous Emotional Annotations. Journal of Language Resources and Evaluation 50, 3 (September 2016), 497--521. https:\/\/doi.org\/10.1007\/s10579-015--9300-0","journal-title":"Journal of Language Resources and Evaluation"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"e_1_3_2_2_28_1","volume-title":"Interpreting Ambiguous Emotional Expressions. In International Conference on Affective Computing and Intelligent Interaction (ACII 2009","author":"Mower E.","year":"2009","unstructured":"E. Mower , A. Metallinou , C.-C. Lee , A. Kazemzadeh , C. Busso , S. Lee , and S.S. Narayanan . 2009 . Interpreting Ambiguous Emotional Expressions. In International Conference on Affective Computing and Intelligent Interaction (ACII 2009 ). Amsterdam, The Netherlands, 1--8. https:\/\/doi.org\/10.1109\/ACII. 2009 .5349500 10.1109\/ACII.2009.5349500 E. Mower, A. Metallinou, C.-C. Lee, A. Kazemzadeh, C. Busso, S. Lee, and S.S. Narayanan. 2009. Interpreting Ambiguous Emotional Expressions. In International Conference on Affective Computing and Intelligent Interaction (ACII 2009). Amsterdam, The Netherlands, 1--8. https:\/\/doi.org\/10.1109\/ACII.2009.5349500"},{"key":"e_1_3_2_2_29_1","volume-title":"EmoReact: A Multimodal Approach and Dataset for Recognizing Emotional Responses in Children. In ACM International Conference on Multimodal Interaction","author":"Nojavanasghari B.","year":"2016","unstructured":"B. Nojavanasghari , T. Baltru\u0161aitis , C.E. Hughes , and L.-P. Morency . 2016 . EmoReact: A Multimodal Approach and Dataset for Recognizing Emotional Responses in Children. In ACM International Conference on Multimodal Interaction . Tokyo, Japan, 137--144. https:\/\/doi.org\/10.1145\/2993148.2993168 10.1145\/2993148.2993168 B. Nojavanasghari, T. Baltru\u0161aitis, C.E. Hughes, and L.-P. Morency. 2016. EmoReact: A Multimodal Approach and Dataset for Recognizing Emotional Responses in Children. In ACM International Conference on Multimodal Interaction. Tokyo, Japan, 137--144. https:\/\/doi.org\/10.1145\/2993148.2993168"},{"key":"e_1_3_2_2_30_1","volume-title":"Deep Face Recognition. In British Machine Vision Conference (BMVC","author":"Parkhi O.M.","year":"2015","unstructured":"O.M. Parkhi , A. Vedaldi , and A. Zisserman . 2015 . Deep Face Recognition. In British Machine Vision Conference (BMVC 2015 ). Swansea,UK, 1--12. https:\/\/doi.org\/10.5244\/c.29.41 10.5244\/c.29.41 O.M. Parkhi, A. Vedaldi, and A. Zisserman. 2015. Deep Face Recognition. In British Machine Vision Conference (BMVC 2015). Swansea,UK, 1--12. https:\/\/doi.org\/10.5244\/c.29.41"},{"key":"#cr-split#-e_1_3_2_2_31_1.1","doi-asserted-by":"crossref","unstructured":"S. Parthasarathy and C. Busso. 2017. Jointly Predicting Arousal Valence and Dominance with Multi-Task Learning. In Interspeech 2017. Stockholm Sweden 1103--1107. https:\/\/doi.org\/10.21437\/Interspeech.2017--1494 10.21437\/Interspeech.2017--1494","DOI":"10.21437\/Interspeech.2017-1494"},{"key":"#cr-split#-e_1_3_2_2_31_1.2","doi-asserted-by":"crossref","unstructured":"S. Parthasarathy and C. Busso. 2017. Jointly Predicting Arousal Valence and Dominance with Multi-Task Learning. In Interspeech 2017. Stockholm Sweden 1103--1107. https:\/\/doi.org\/10.21437\/Interspeech.2017--1494","DOI":"10.21437\/Interspeech.2017-1494"},{"key":"#cr-split#-e_1_3_2_2_32_1.1","doi-asserted-by":"crossref","unstructured":"S. Parthasarathy and C. Busso. 2018. Ladder Networks for Emotion Recognition: Using Unsupervised Auxiliary Tasks to Improve Predictions of Emotional Attributes. In Interspeech 2018. Hyderabad India 3698--3702. https:\/\/doi.org\/10.21437\/Interspeech.2018--1391 10.21437\/Interspeech.2018--1391","DOI":"10.21437\/Interspeech.2018-1391"},{"key":"#cr-split#-e_1_3_2_2_32_1.2","doi-asserted-by":"crossref","unstructured":"S. Parthasarathy and C. Busso. 2018. Ladder Networks for Emotion Recognition: Using Unsupervised Auxiliary Tasks to Improve Predictions of Emotional Attributes. In Interspeech 2018. Hyderabad India 3698--3702. https:\/\/doi.org\/10.21437\/Interspeech.2018--1391","DOI":"10.21437\/Interspeech.2018-1391"},{"key":"e_1_3_2_2_33_1","volume-title":"Real-Time Object Detection. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016","author":"Redmon J.","year":"2016","unstructured":"J. Redmon , S. Divvala , R. Girshick , and A. Farhadi . 2016. You Only Look Once: Unified , Real-Time Object Detection. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016 ). Las Vegas, NV, USA, 779--788. https:\/\/doi.org\/10.1109\/CVPR. 2016 .91 10.1109\/CVPR.2016.91 J. Redmon, S. Divvala, R. Girshick, and A. Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016). Las Vegas, NV, USA, 779--788. https:\/\/doi.org\/10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_2_34_1","unstructured":"S. Ren K. He R. Girshick and J. Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in neural information processing systems (NIPS 2015). Montreal Canada 91--99.  S. Ren K. He R. Girshick and J. Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in neural information processing systems (NIPS 2015). Montreal Canada 91--99."},{"key":"#cr-split#-e_1_3_2_2_35_1.1","doi-asserted-by":"crossref","unstructured":"F. Ringeval A. Sonderegger J. Sauer and D. Lalanne. 2013. Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions. In 2nd International Workshop on Emotion Representation Analysis and Synthesis in Continuous Time and Space (EmoSPACE 2013). Shanghai China 1--8. https:\/\/doi.org\/10.1109\/FG.2013.6553805 10.1109\/FG.2013.6553805","DOI":"10.1109\/FG.2013.6553805"},{"key":"#cr-split#-e_1_3_2_2_35_1.2","doi-asserted-by":"crossref","unstructured":"F. Ringeval A. Sonderegger J. Sauer and D. Lalanne. 2013. Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions. In 2nd International Workshop on Emotion Representation Analysis and Synthesis in Continuous Time and Space (EmoSPACE 2013). Shanghai China 1--8. https:\/\/doi.org\/10.1109\/FG.2013.6553805","DOI":"10.1109\/FG.2013.6553805"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.76.5.805"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00995206"},{"key":"#cr-split#-e_1_3_2_2_38_1.1","unstructured":"N. Sadoughi and C. Busso. 2020. Speech-Driven Expressive Talking Lips with Conditional Sequential Generative Adversarial Networks. IEEE Transactions on Affective Computing To appear (2020). https:\/\/doi.org\/10.1109\/TAFFC.2019. 10.1109\/TAFFC.2019"},{"key":"#cr-split#-e_1_3_2_2_38_1.2","doi-asserted-by":"crossref","unstructured":"N. Sadoughi and C. Busso. 2020. Speech-Driven Expressive Talking Lips with Conditional Sequential Generative Adversarial Networks. IEEE Transactions on Affective Computing To appear (2020). https:\/\/doi.org\/10.1109\/TAFFC.2019.","DOI":"10.1109\/TAFFC.2019.2916031"},{"key":"e_1_3_2_2_39_1","volume-title":"The INTERSPEECH 2013 Computational Paralinguistics Challenge: Social Signals, Conflict, Emotion, Autism. In Interspeech","author":"Schuller B.","year":"2013","unstructured":"B. Schuller , S. Steidl , A. Batliner , A. Vinciarelli , K. Scherer , F. Ringeval , M. Chetouani , F. Weninger , F. Eyben , E. Marchi , M. Mortillaro , H. Salamin , A. Polychroniou , F. Valente , and S. Kim . 2013 . The INTERSPEECH 2013 Computational Paralinguistics Challenge: Social Signals, Conflict, Emotion, Autism. In Interspeech 2013 . Lyon, France, 148--152. B. Schuller, S. Steidl, A. Batliner, A. Vinciarelli, K. Scherer, F. Ringeval, M. Chetouani, F. Weninger, F. Eyben, E. Marchi, M. Mortillaro, H. Salamin, A. Polychroniou, F. Valente, and S. Kim. 2013. The INTERSPEECH 2013 Computational Paralinguistics Challenge: Social Signals, Conflict, Emotion, Autism. In Interspeech 2013. Lyon, France, 148--152."},{"key":"e_1_3_2_2_40_1","unstructured":"V. Sethu E. Mower Provost J. Epps C. Busso N. Cummins and S. Narayanan. 2019. The ambiguous world of emotion representation. ArXiv e-prints (arXiv:1909.00360) (May 2019) 1--19. arXiv:cs.HC\/1909.00360  V. Sethu E. Mower Provost J. Epps C. Busso N. Cummins and S. Narayanan. 2019. The ambiguous world of emotion representation. ArXiv e-prints (arXiv:1909.00360) (May 2019) 1--19. arXiv:cs.HC\/1909.00360"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.25"},{"key":"#cr-split#-e_1_3_2_2_42_1.1","doi-asserted-by":"crossref","unstructured":"K. Sridhar S. Parthasarathy and C. Busso. 2018. Role of Regularization in the Prediction of Valence from Speech. In Interspeech 2018. Hyderabad India 941--945. https:\/\/doi.org\/10.21437\/Interspeech.2018--2508 10.21437\/Interspeech.2018--2508","DOI":"10.21437\/Interspeech.2018-2508"},{"key":"#cr-split#-e_1_3_2_2_42_1.2","doi-asserted-by":"crossref","unstructured":"K. Sridhar S. Parthasarathy and C. Busso. 2018. Role of Regularization in the Prediction of Valence from Speech. In Interspeech 2018. Hyderabad India 941--945. https:\/\/doi.org\/10.21437\/Interspeech.2018--2508","DOI":"10.21437\/Interspeech.2018-2508"},{"key":"e_1_3_2_2_43_1","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019","author":"Thomas S.","year":"2019","unstructured":"S. Thomas , M. Suzuki , Y. Huang , G. Kurata , Z. Tuske , G. Saon , B. Kingsbury , M. Picheny , T. Dibert , A. Kaiser-Schatzlein , and B. Samko . 2019. English Broadcast News Speech Recognition by Humans and Machines . In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019 ). Brighton, United Kingdom, 6455--6459. https:\/\/doi.org\/10.1109\/ICASSP. 2019 .8683211 10.1109\/ICASSP.2019.8683211 S. Thomas, M. Suzuki, Y. Huang, G. Kurata, Z. Tuske, G. Saon, B. Kingsbury, M. Picheny, T. Dibert, A. Kaiser-Schatzlein, and B. Samko. 2019. English Broadcast News Speech Recognition by Humans and Machines. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019). Brighton, United Kingdom, 6455--6459. https:\/\/doi.org\/10.1109\/ICASSP.2019.8683211"},{"key":"e_1_3_2_2_44_1","volume-title":"European Conference on Computer Vision (ECCV","volume":"9911","author":"Wen Y.","year":"2016","unstructured":"Y. Wen , K. Zhang , Z. Li , and Y. Qiao . 2016. A Discriminative Feature Learning Approach for Deep Face Recognition . In European Conference on Computer Vision (ECCV 2016 ), B. Leibe, J. Matas, N. Sebe, and M. Welling (Eds.). Lecture Notes in Computer Science , Vol. 9911 . Springer Berlin Heidelberg, Amsterdam, The Netherlands, 499--515. https:\/\/doi.org\/10.1007\/978--3--319--46478--7_31 10.1007\/978--3--319--46478--7_31 Y. Wen, K. Zhang, Z. Li, and Y. Qiao. 2016. A Discriminative Feature Learning Approach for Deep Face Recognition. In European Conference on Computer Vision (ECCV 2016), B. Leibe, J. Matas, N. Sebe, and M. Welling (Eds.). Lecture Notes in Computer Science, Vol. 9911. Springer Berlin Heidelberg, Amsterdam, The Netherlands, 499--515. https:\/\/doi.org\/10.1007\/978--3--319--46478--7_31"},{"key":"e_1_3_2_2_45_1","volume-title":"ACM Association for Computational Linguistics (ACL 2004","volume":"1","author":"Zadeh A.","year":"2018","unstructured":"A. Zadeh , P.P. Liang , J. Vanbriesen , S. Poria , E. Tong , E. Cambria , M. Chen , and L.-P. Morency . 2018 . Multimodal Language Analysis in the Wild: Carnegie Mellon UniversityMOSEI Dataset and Interpretable Dynamic Fusion Graph . In ACM Association for Computational Linguistics (ACL 2004 ), Vol. 1 . Melbourne, Australia, 2236--2246. https:\/\/doi.org\/10. 18653\/v1\/P18--1208 10.18653\/v1 A. Zadeh, P.P. Liang, J. Vanbriesen, S. Poria, E. Tong, E. Cambria, M. Chen, and L.-P. Morency. 2018. Multimodal Language Analysis in the Wild: Carnegie Mellon UniversityMOSEI Dataset and Interpretable Dynamic Fusion Graph. In ACM Association for Computational Linguistics (ACL 2004), Vol. 1. Melbourne, Australia, 2236--2246. https:\/\/doi.org\/10.18653\/v1\/P18--1208"},{"key":"e_1_3_2_2_46_1","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2017","author":"Zhang Y.","year":"2017","unstructured":"Y. Zhang , W. Chan , and N. Jaitly . 2017. Very deep convolutional networks for end-to-end speech recognition . In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2017 ). New Orleans, LA, USA, 4845--4849. https:\/\/doi.org\/10.1109\/ICASSP. 2017 .7953077 10.1109\/ICASSP.2017.7953077 Y. Zhang, W. Chan, and N. Jaitly. 2017. Very deep convolutional networks for end-to-end speech recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2017). New Orleans, LA, USA, 4845--4849. https:\/\/doi.org\/10.1109\/ICASSP.2017.7953077"}],"event":{"name":"ICMI '20: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Virtual Event Netherlands","acronym":"ICMI '20","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2020 International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3382507.3418872","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3382507.3418872","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:49Z","timestamp":1750197769000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3382507.3418872"}},"subtitle":["A Natural Audiovisual Emotional Database"],"short-title":[],"issued":{"date-parts":[[2020,10,21]]},"references-count":54,"alternative-id":["10.1145\/3382507.3418872","10.1145\/3382507"],"URL":"https:\/\/doi.org\/10.1145\/3382507.3418872","relation":{},"subject":[],"published":{"date-parts":[[2020,10,21]]},"assertion":[{"value":"2020-10-22","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}