{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T09:18:27Z","timestamp":1771665507532,"version":"3.50.1"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030687892","type":"print"},{"value":"9783030687908","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-68790-8_10","type":"book-chapter","created":{"date-parts":[[2021,2,22]],"date-time":"2021-02-22T13:13:24Z","timestamp":1613999604000},"page":"114-128","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Combining Deep and Unsupervised Features for Multilingual Speech Emotion Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8765-604X","authenticated-orcid":false,"given":"Vincenzo","family":"Scotti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Federico","family":"Galati","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5344-5976","authenticated-orcid":false,"given":"Licia","family":"Sbattella","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2830-4247","authenticated-orcid":false,"given":"Roberto","family":"Tedesco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,23]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.specom.2019.12.001","volume":"116","author":"MB Ak\u00e7ay","year":"2020","unstructured":"Ak\u00e7ay, M.B., O\u011fuz, K.: Speech emotion recognition: emotional models, databases, features, preprocessing methods, supporting modalities, and classifiers. Speech Commun. 116, 56\u201376 (2020)","journal-title":"Speech Commun."},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Atmaja, B.T., Shirai, K., Akagi, M.: Speech emotion recognition using speech feature and word embedding. In: 2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), pp. 519\u2013523 (2019)","DOI":"10.1109\/APSIPAASC47483.2019.9023098"},{"key":"10_CR3","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate (2016)"},{"issue":"4","key":"10_CR4","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42(4), 335 (2008)","journal-title":"Lang. Resour. Eval."},{"issue":"5","key":"10_CR5","doi-asserted-by":"publisher","first-page":"312","DOI":"10.3109\/15622975.2015.1012228","volume":"16","author":"T Chaspari","year":"2015","unstructured":"Chaspari, T., Soldatos, C., Maragos, P.: The development of the Athens emotional states inventory (AESI): collection, validation and automatic processing of emotionally loaded sentences. World J. Biol. Psychiatry 16(5), 312\u2013322 (2015)","journal-title":"World J. Biol. Psychiatry"},{"key":"10_CR6","unstructured":"Chernykh, V., Sterling, G., Prihodko, P.: Emotion recognition from speech with recurrent neural networks. CoRR abs\/1701.08071 (2017)"},{"key":"10_CR7","unstructured":"Conneau, A., Lample, G.: Cross-lingual language model pretraining. In: Advances in Neural Information Processing Systems, pp. 7059\u20137069 (2019)"},{"key":"10_CR8","unstructured":"Conneau, A., Lample, G., Ranzato, M., Denoyer, L., J\u00e9gou, H.: Word translation without parallel data (2018)"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR09 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"10_CR10","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics, June 2019. https:\/\/doi.org\/10.18653\/v1\/N19-1423. https:\/\/www.aclweb.org\/anthology\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"10_CR11","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/02699939208411068","volume":"6","author":"P Ekman","year":"1992","unstructured":"Ekman, P.: An argument for basic emotions. Cogn. Emotion 6, 169\u2013200 (1992)","journal-title":"Cogn. Emotion"},{"issue":"2","key":"10_CR12","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"10_CR13","volume-title":"Human Facial Expression: An Evolutionary View","author":"AJ Fridlund","year":"1994","unstructured":"Fridlund, A.J.: Human Facial Expression: An Evolutionary View. Academic Press, San Diego (1994)"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: Proceedings of IEEE ICASSP 2017, New Orleans, LA (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition (2015)","DOI":"10.1109\/CVPR.2016.90"},{"key":"10_CR16","doi-asserted-by":"publisher","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 131\u2013135, March 2017. https:\/\/doi.org\/10.1109\/ICASSP.2017.7952132","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9, 1735\u201380 (1997)","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"10_CR18","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift (2015)"},{"key":"10_CR19","unstructured":"Jurafsky, D., Martin, J.H.: Speech and Language Processing, chap. 6: Vector Semantics and Embeddings. Prentice-Hall, 3rd edn., August 2020, draft of August 2020. https:\/\/web.stanford.edu\/~jurafsky\/slp3\/"},{"key":"10_CR20","unstructured":"Lample, G., Conneau, A., Denoyer, L., Ranzato, M.: Unsupervised machine translation using monolingual corpora only (2018)"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Lazarus, R.S.: Emotion and adaptation. Oxford University Press on Demand 1, 35\u201354, May 1991","DOI":"10.1093\/oso\/9780195069945.001.0001"},{"key":"10_CR22","unstructured":"Lin, M., Chen, Q., Yan, S.: Network in network (2014)"},{"key":"10_CR23","unstructured":"Liu, Q., Kusner, M.J., Blunsom, P.: A survey on contextual embeddings (2020)"},{"key":"10_CR24","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1016\/j.knosys.2018.07.041","volume":"161","author":"N Majumder","year":"2018","unstructured":"Majumder, N., Hazarika, D., Gelbukh, A., Cambria, E., Poria, S.: Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowl. Based Syst. 161, 124\u2013133 (2018)","journal-title":"Knowl. Based Syst."},{"key":"10_CR25","first-page":"35","volume":"1","author":"AS Manstead","year":"1992","unstructured":"Manstead, A.S., Wagner, H.L.: Arousal, cognition and emotion: an appraisal of two-factor theory. Cogn. Emotion 1, 35\u201354 (1992)","journal-title":"Cogn. Emotion"},{"key":"10_CR26","unstructured":"Mikolov, T., Grave, E., Bojanowski, P., Puhrsch, C., Joulin, A.: Advances in pre-training distributed word representations. In: Proceedings of the International Conference on Language Resources and Evaluation (LREC 2018) (2018)"},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Mirsamadi, S., Barsoum, E., Zhang, C.: Automatic speech emotion recognition using recurrent neural networks with local attention. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2227\u20132231, March 2017","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Montero, J.M., Guti\u00e9rrez-Arriola, J., Col\u00e1s, J., Mac\u00edas-Guarasa, J., Enr\u00edquez, E., Pardo, J.M.: Development of an emotional speech synthesiser in Spanish. In: Sixth European Conference on Speech Communication and Technology (1999)","DOI":"10.21437\/Eurospeech.1999-466"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Parada-Cabaleiro, E., Costantini, G., Batliner, A., Baird, A., Schuller, B.W.: Categorical vs dimensional perception of Italian emotional speech. In: INTERSPEECH, pp. 3638\u20133642 (2018)","DOI":"10.21437\/Interspeech.2018-47"},{"key":"10_CR30","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014). http:\/\/www.aclweb.org\/anthology\/D14-1162"},{"issue":"4","key":"10_CR31","doi-asserted-by":"publisher","first-page":"344","DOI":"10.1511\/2001.4.344","volume":"89","author":"R Plutchik","year":"2001","unstructured":"Plutchik, R.: The nature of emotions: human emotions have deep evolutionary roots, a fact that may explain their complexity and provide tools for clinical practice. Am. Sci. 89(4), 344\u2013350 (2001)","journal-title":"Am. Sci."},{"issue":"11","key":"10_CR32","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. Trans. Sig. Proc. 45(11), 2673\u20132681 (1997)","journal-title":"Trans. Sig. Proc."},{"key":"10_CR33","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2015)"},{"key":"10_CR34","unstructured":"Tieleman, T., Hinton, G.: Lecture 6.5-rmsprop: divide the gradient by a running average of its recent magnitude. COURSERA Neural Netw. Machine Learn. 4(2), 26\u201331 (2012)"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Tompson, J., Goroshin, R., Jain, A., LeCun, Y., Bregler, C.: Efficient object localization using convolutional networks (2015)","DOI":"10.1109\/CVPR.2015.7298664"},{"key":"10_CR36","unstructured":"Tripathi, S., Tripathi, S., Beigi, H.: Multi-modal emotion recognition on IEMOCAP dataset using deep learning (2019)"},{"key":"10_CR37","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"10_CR38","unstructured":"Ventura, D., Warnick, S.: A theoretical foundation for inductive transfer. Brigham Young University, College of Physical and Mathematical Sciences (2007)"},{"key":"10_CR39","unstructured":"Wang, A., et al.: SuperGLUE: a stickier benchmark for general-purpose language understanding systems (2020)"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: Glue: a multi-task benchmark and analysis platform for natural language understanding (2019)","DOI":"10.18653\/v1\/W18-5446"},{"key":"10_CR41","doi-asserted-by":"crossref","unstructured":"Xie, W., Nagrani, A., Chung, J.S., Zisserman, A.: Utterance-level aggregation for speaker recognition in the wild. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5791\u20135795. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683120"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition. ICPR International Workshops and Challenges"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-68790-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T17:08:22Z","timestamp":1724519302000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-68790-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030687892","9783030687908"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-68790-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"23 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 January 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ICPR2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icpr2020.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}