{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:13:20Z","timestamp":1778084000891,"version":"3.51.4"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icassp43922.2022.9747754","type":"proceedings-article","created":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T15:50:34Z","timestamp":1651074634000},"page":"6442-6446","source":"Crossref","is-referenced-by-count":31,"title":["Representation Learning Through Cross-Modal Conditional Teacher-Student Training For Speech Emotion Recognition"],"prefix":"10.1109","author":[{"given":"Sundararajan","family":"Srinivasan","sequence":"first","affiliation":[{"name":"Amazon AWS AI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaocheng","family":"Huang","sequence":"additional","affiliation":[{"name":"Amazon AWS AI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Katrin","family":"Kirchhoff","sequence":"additional","affiliation":[{"name":"Amazon AWS AI"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2636"},{"key":"ref32","first-page":"1897","article-title":"SPLAT?: Speech-Language Joint Pre-Training for Spoken Language Understanding","author":"chung","year":"2021"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"776","DOI":"10.1007\/978-3-030-58621-8_45","article-title":"Contrastive Multiview Coding","author":"tian","year":"2020","journal-title":"Lecture Notes in Computer Science"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688093"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1775"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413910"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054548"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414996"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-703"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","article-title":"End-to-End Multimodal Emotion Recognition Using Deep Neural Networks","volume":"11","author":"trigeorgis","year":"2017","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2020.14"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683190"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref19","article-title":"Representation Learning with Contrastive Predictive Coding","author":"van den oord","year":"2018"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683438"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2011.5771357"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"255","DOI":"10.2307\/2532051","article-title":"A Concordance Correlation Coefficient to Evaluate Reproducibility","volume":"45","author":"lawrence","year":"1989","journal-title":"Biometrics"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.06.016"},{"key":"ref6","first-page":"3","article-title":"AVEC 2016 - depression, mood, and emotion recognition workshop and challenge","author":"valstar","year":"2016","journal-title":"Proceedings of the 6th International Workshop on AVEC ACM MM"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2736999"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2936124"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3129340"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.52"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1723"},{"key":"ref21","first-page":"1","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","author":"baevski","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref24","article-title":"Using Large Pre-Trained Models with Cross-Modal Attention for Multi-Modal Emotion Recognition","author":"n","year":"2021"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1212"},{"key":"ref26","first-page":"2196","article-title":"Discriminatively trained recurrent neural networks for continuous dimensional emotion recognition from audio","author":"weninger","year":"2016","journal-title":"IJCAI International Joint Conference on Artificial Intelligence"},{"key":"ref25","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018"}],"event":{"name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Singapore, Singapore","start":{"date-parts":[[2022,5,23]]},"end":{"date-parts":[[2022,5,27]]}},"container-title":["ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9745891\/9746004\/09747754.pdf?arnumber=9747754","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,15]],"date-time":"2022-08-15T16:11:23Z","timestamp":1660579883000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9747754\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/icassp43922.2022.9747754","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}