{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T09:51:05Z","timestamp":1742982665224,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602758"},{"type":"electronic","value":"9783030602765"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_61","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"636-645","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning an Unsupervised and Interpretable Representation of Emotion from Speech"],"prefix":"10.1007","author":[{"given":"Siwei","family":"Wang","sequence":"first","affiliation":[]},{"given":"Catherine","family":"Soladi\u00e9","sequence":"additional","affiliation":[]},{"given":"Renaud","family":"S\u00e9guier","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"61_CR1","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W.F., Weiss, B.: A database of German emotional speech. In: Ninth European Conference on Speech Communication and Technology (2005)","DOI":"10.21437\/Interspeech.2005-446"},{"issue":"4","key":"61_CR2","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42(4), 335 (2008)","journal-title":"Lang. Resour. Eval."},{"key":"61_CR3","doi-asserted-by":"crossref","unstructured":"Caron, M., Bojanowski, P., Joulin, A., Douze, M.: Deep clustering for unsupervised learning of visual features. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 132\u2013149 (2018)","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"61_CR4","volume-title":"Applied Nonparametric Statistics","author":"WW Daniel","year":"1978","unstructured":"Daniel, W.W.: Applied Nonparametric Statistics. Houghton Mifflin, Boston (1978)"},{"key":"61_CR5","doi-asserted-by":"crossref","unstructured":"Eskimez, S.E., Duan, Z., Heinzelman, W.: Unsupervised learning approach to feature analysis for automatic speech emotion recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5099\u20135103 (2018)","DOI":"10.1109\/ICASSP.2018.8462685"},{"key":"61_CR6","doi-asserted-by":"crossref","unstructured":"Eyben, F., Weninger, F., Gross, F., Schuller, B.: Recent developments in openSMILE, the munich open-source multimedia feature extractor. In: Proceedings of the 21st ACM international conference on Multimedia, pp. 835\u2013838 (2013)","DOI":"10.1145\/2502081.2502224"},{"key":"61_CR7","doi-asserted-by":"crossref","unstructured":"Ghosh, S., Laksana, E., Morency, L.P., Scherer, S.: Representation learning for speech emotion recognition. In: Interspeech, pp. 3603\u20133607 (2016)","DOI":"10.21437\/Interspeech.2016-692"},{"key":"61_CR8","doi-asserted-by":"crossref","unstructured":"Han, K., Yu, D., Tashev, I.: Speech emotion recognition using deep neural network and extreme learning machine. In: Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-57"},{"issue":"5786","key":"61_CR9","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Salakhutdinov, R.R.: Reducing the dimensionality of data with neural networks. Science 313(5786), 504\u2013507 (2006)","journal-title":"Science"},{"key":"61_CR10","doi-asserted-by":"crossref","unstructured":"Kaya, H., Karpov, A.A., Salah, A.A.: Fisher vectors with cascaded normalization for paralinguistic analysis. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","DOI":"10.21437\/Interspeech.2015-193"},{"key":"61_CR11","doi-asserted-by":"crossref","unstructured":"Kim, Y., Provost, E.M.: Emotion classification via utterance-level dynamics: a pattern-based approach to characterizing affective expressions. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3677\u20133681 (2013)","DOI":"10.1109\/ICASSP.2013.6638344"},{"key":"61_CR12","doi-asserted-by":"crossref","unstructured":"Latif, S., Rana, R., Qadir, J., Epps, J.: Variational autoencoders for learning latent representations of speech emotion: a preliminary study. In: Interspeech, International Speech Communication Association (ISCA), pp. 3107\u20133111 (2018)","DOI":"10.21437\/Interspeech.2018-1568"},{"issue":"4","key":"61_CR13","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1109\/TASLP.2019.2898816","volume":"27","author":"R Lotfian","year":"2019","unstructured":"Lotfian, R., Busso, C.: Curriculum learning for speech emotion recognition from crowdsourced labels. IEEE\/ACM Trans. Audio, Speech Lang. Process. 27(4), 815\u2013826 (2019)","journal-title":"IEEE\/ACM Trans. Audio, Speech Lang. Process."},{"issue":"1","key":"61_CR14","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/T-AFFC.2011.20","volume":"3","author":"G McKeown","year":"2011","unstructured":"McKeown, G., Valstar, M., Cowie, R., Pantic, M., Schroder, M.: The SEMAINE database: annotated multimodal records of emotionally colored conversations between a person and a limited agent. IEEE Trans. Affect. Comput. 3(1), 5\u201317 (2011)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"2","key":"61_CR15","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/s10649-006-9034-4","volume":"63","author":"P Op\u2019t Eynde","year":"2006","unstructured":"Op\u2019t Eynde, P., De Corte, E., Verschaffel, L.: Accepting emotional complexity: a socio-constructivist perspective on the role of emotions in the mathematics classroom. Educ. Stud. Math. 63(2), 193\u2013207 (2006)","journal-title":"Educ. Stud. Math."},{"key":"61_CR16","unstructured":"Pearson, K.: LIII on lines and planes of closest fit to systems of points in space. London Edinb. Dublin Philos. Mag. J. Sci. 2(11), 559\u2013572 (1901)"},{"key":"61_CR17","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.inffus.2017.02.003","volume":"37","author":"S Poria","year":"2017","unstructured":"Poria, S., Cambria, E., Bajpai, R., Hussain, A.: A review of affective computing: from unimodal analysis to multimodal fusion. Inform. Fusion 37, 98\u2013125 (2017)","journal-title":"Inform. Fusion"},{"issue":"6","key":"61_CR18","doi-asserted-by":"publisher","first-page":"1161","DOI":"10.1037\/h0077714","volume":"39","author":"JA Russell","year":"1980","unstructured":"Russell, J.A.: A circumplex model of affect. J. Pers. Soc. Psychol. 39(6), 1161 (1980)","journal-title":"J. Pers. Soc. Psychol."},{"issue":"11","key":"61_CR19","doi-asserted-by":"publisher","first-page":"2660","DOI":"10.1109\/TNNLS.2016.2599820","volume":"28","author":"W Samek","year":"2016","unstructured":"Samek, W., Binder, A., Montavon, G., Lapuschkin, S., M\u00fcller, K.R.: Evaluating the visualization of what a deep neural network has learned. IEEE Trans. Neural Netw. Learn. Syst. 28(11), 2660\u20132673 (2016)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"61_CR20","doi-asserted-by":"crossref","unstructured":"Schuller, B., Rigoll, G., Lang, M.: Hidden Markov model-based speech emotion recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), vol. 2, pp. II-1 (2003)","DOI":"10.1109\/ICME.2003.1220939"},{"key":"61_CR21","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A.: The interspeech 2009 emotion challenge. In: Tenth Annual Conference of the International Speech Communication Association (2009)","DOI":"10.21437\/Interspeech.2009-103"},{"issue":"11","key":"61_CR22","doi-asserted-by":"publisher","first-page":"1598","DOI":"10.1016\/j.cviu.2013.07.005","volume":"117","author":"C Soladi\u00e9","year":"2013","unstructured":"Soladi\u00e9, C., Stoiber, N., S\u00e9guier, R.: Invariant representation of facial expressions for blended expression recognition on unknown subjects. Comput. Vis. Image Underst. 117(11), 1598\u20131609 (2013)","journal-title":"Comput. Vis. Image Underst."},{"key":"61_CR23","doi-asserted-by":"crossref","unstructured":"Wang, S., Soladi\u00e9, C., S\u00e9guier, R.: OCAE: Organization-controlled autoencoder for unsupervised speech emotion analysis. In: 5th International Conference on Frontiers of Signal Processing (ICFSP), pp. 72\u201376. IEEE (2019)","DOI":"10.1109\/ICFSP48124.2019.8938073"},{"issue":"5","key":"61_CR24","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1016\/j.specom.2010.08.013","volume":"53","author":"S Wu","year":"2011","unstructured":"Wu, S., Falk, T.H., Chan, W.Y.: Automatic speech emotion recognition using modulation spectral features. Speech Commun. 53(5), 768\u2013785 (2011)","journal-title":"Speech Commun."},{"key":"61_CR25","doi-asserted-by":"crossref","unstructured":"Zhao, S., Ding, G., Han, J., Gao, Y.: Personality-aware personalized emotion recognition from physiological signals. In: IJCAI, pp. 1660\u20131667 (2018)","DOI":"10.24963\/ijcai.2018\/230"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_61","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T02:43:26Z","timestamp":1669085006000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_61"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_61","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}