{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T13:14:58Z","timestamp":1767964498137,"version":"3.49.0"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030687793","type":"print"},{"value":"9783030687809","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-68780-9_25","type":"book-chapter","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T17:04:13Z","timestamp":1614186253000},"page":"289-300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Multimodal Emotion Recognition Based on Speech and Physiological Signals Using Deep Neural Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5408-0777","authenticated-orcid":false,"given":"Ali","family":"Bakhshi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7886-3653","authenticated-orcid":false,"given":"Stephan","family":"Chalup","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,25]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP 2014: Conference on Emprical Methods in Natural Language Processing (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"25_CR2","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.entcs.2019.04.009","volume":"343","author":"M Egger","year":"2019","unstructured":"Egger, M., Ley, M., Hanke, S.: Emotion recognition from physiological signal analysis: a review. Electron. Notes Theoret. Comput. Sci. 343, 35\u201355 (2019)","journal-title":"Electron. Notes Theoret. Comput. Sci."},{"issue":"3","key":"25_CR3","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"El Ayadi, M., Kamel, M.S., Karray, F.: Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recogn. 44(3), 572\u2013587 (2011)","journal-title":"Pattern Recogn."},{"key":"25_CR4","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"25_CR5","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.imavis.2016.11.020","volume":"65","author":"J Han","year":"2017","unstructured":"Han, J., Zhang, Z., Cummins, N., Ringeval, F., Schuller, B.: Strength modelling for real-world automatic continuous affect recognition from audiovisual signals. Image Vis. Comput. 65, 76\u201386 (2017)","journal-title":"Image Vis. Comput."},{"issue":"7","key":"25_CR6","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Osindero, S., Teh, Y.W.: A fast learning algorithm for deep belief nets. Neural Comput. 18(7), 1527\u20131554 (2006)","journal-title":"Neural Comput."},{"key":"25_CR7","unstructured":"Hinton, G.E., Zemel, R.S.: Autoencoders, minimum description length and Helmholtz free energy. In: Advances in Neural Information Processing Systems, pp. 3\u201310 (1994)"},{"issue":"8","key":"25_CR8","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: Staircase regression in OA RVM, data selection and gender dependency in AVEC 2016. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, pp. 19\u201326 (2016)","DOI":"10.1145\/2988257.2988265"},{"key":"25_CR10","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"3","key":"25_CR11","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1007\/s10772-018-09572-8","volume":"22","author":"S Lalitha","year":"2018","unstructured":"Lalitha, S., Tripathi, S., Gupta, D.: Enhanced speech emotion detection using deep neural networks. Int. J. Speech Technol. 22(3), 497\u2013510 (2018). https:\/\/doi.org\/10.1007\/s10772-018-09572-8","journal-title":"Int. J. Speech Technol."},{"key":"25_CR12","doi-asserted-by":"publisher","first-page":"255","DOI":"10.2307\/2532051","volume":"45","author":"I Lawrence","year":"1989","unstructured":"Lawrence, I., Lin, K.: A concordance correlation coefficient to evaluate reproducibility. Biometrics 45, 255\u2013268 (1989)","journal-title":"Biometrics"},{"key":"25_CR13","first-page":"143","volume":"19","author":"Y LeCun","year":"1989","unstructured":"LeCun, Y.: Generalization and network design strategies. Connect. Pers. 19, 143\u2013155 (1989)","journal-title":"Connect. Pers."},{"issue":"3","key":"25_CR14","doi-asserted-by":"publisher","first-page":"102185","DOI":"10.1016\/j.ipm.2019.102185","volume":"57","author":"C Li","year":"2020","unstructured":"Li, C., Bao, Z., Li, L., Zhao, Z.: Exploring temporal representations by leveraging attention-based bidirectional LSTM-RNNs for multi-modal emotion recognition. Inf. Process. Manage. 57(3), 102185 (2020)","journal-title":"Inf. Process. Manage."},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Matsuda, Y., Fedotov, D., Takahashi, Y., Arakawa, Y., Yasumoto, K., Minker, W.: EmoTour: multimodal emotion recognition using physiological and audio-visual features. In: Proceedings of the 2018 ACM International Joint Conference and 2018 International Symposium on Pervasive and Ubiquitous Computing and Wearable Computers, pp. 946\u2013951 (2018)","DOI":"10.1145\/3267305.3267687"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Ranganathan, H., Chakraborty, S., Panchanathan, S.: Multimodal emotion recognition using deep learning architectures. In: 2016 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1\u20139. IEEE (2016)","DOI":"10.1109\/WACV.2016.7477679"},{"key":"25_CR17","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1016\/j.patrec.2014.11.007","volume":"66","author":"F Ringeval","year":"2015","unstructured":"Ringeval, F., et al.: Prediction of asynchronous dimensional emotion ratings from audiovisual and physiological data. Pattern Recogn. Lett. 66, 22\u201330 (2015)","journal-title":"Pattern Recogn. Lett."},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Ringeval, F., et al.: Av+ EC 2015: the first affect recognition challenge bridging across audio, video, and physiological data. In: Proceedings of the 5th International Workshop on Audio\/Visual Emotion Challenge, pp. 3\u20138. ACM (2015)","DOI":"10.1145\/2808196.2811642"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Ringeval, F., Sonderegger, A., Sauer, J., Lalanne, D.: Introducing the RECOLA multimodal corpus of remote collaborative and affective interactions. In: 2013 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), pp. 1\u20138. IEEE (2013)","DOI":"10.1109\/FG.2013.6553805"},{"issue":"7","key":"25_CR20","doi-asserted-by":"publisher","first-page":"2074","DOI":"10.3390\/s18072074","volume":"18","author":"L Shu","year":"2018","unstructured":"Shu, L., et al.: A review of emotion recognition using physiological signals. Sensors 18(7), 2074 (2018)","journal-title":"Sensors"},{"issue":"1","key":"25_CR21","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Machine Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Machine Learn. Res."},{"key":"25_CR22","doi-asserted-by":"crossref","unstructured":"Trigeorgis, G., et al.: Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5200\u20135204. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472669"},{"issue":"8","key":"25_CR23","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","volume":"11","author":"P Tzirakis","year":"2017","unstructured":"Tzirakis, P., Trigeorgis, G., Nicolaou, M.A., Schuller, B.W., Zafeiriou, S.: End-to-end multimodal emotion recognition using deep neural networks. IEEE J. Sel. Top. Sig. Process. 11(8), 1301\u20131309 (2017)","journal-title":"IEEE J. Sel. Top. Sig. Process."},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Tzirakis, P., Zhang, J., Schuller, B.W.: End-to-end speech emotion recognition using deep neural networks. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5089\u20135093. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462677"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Yang, Z., Hirschberg, J.: Predicting arousal and valence from waveforms and spectrograms using deep neural networks. In: INTERSPEECH, pp. 3092\u20133096 (2018)","DOI":"10.21437\/Interspeech.2018-2397"},{"key":"25_CR26","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/j.cmpb.2016.12.005","volume":"140","author":"Z Yin","year":"2017","unstructured":"Yin, Z., Zhao, M., Wang, Y., Yang, J., Zhang, J.: Recognition of emotions using multimodal physiological signals and an ensemble deep learning model. Comput. Methods Programs Biomed. 140, 93\u2013110 (2017)","journal-title":"Comput. Methods Programs Biomed."},{"issue":"6","key":"25_CR27","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2017","unstructured":"Zhang, S., Zhang, S., Huang, T., Gao, W.: Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Trans. Multimedia 20(6), 1576\u20131590 (2017)","journal-title":"IEEE Trans. Multimedia"},{"key":"25_CR28","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao, J., Mao, X., Chen, L.: Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed. Signal Process. Control 47, 312\u2013323 (2019)","journal-title":"Biomed. Signal Process. Control"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition. ICPR International Workshops and Challenges"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-68780-9_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T17:36:22Z","timestamp":1614188182000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-68780-9_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030687793","9783030687809"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-68780-9_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 January 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ICPR2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icpr2020.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}