{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,14]],"date-time":"2026-06-14T23:17:30Z","timestamp":1781479050712,"version":"3.54.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T00:00:00Z","timestamp":1696204800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T00:00:00Z","timestamp":1696204800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16849-x","type":"journal-article","created":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T04:01:35Z","timestamp":1696219295000},"page":"37603-37620","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Speech emotion recognition and classification using hybrid deep CNN and BiLSTM model"],"prefix":"10.1007","volume":"83","author":[{"given":"Swami","family":"Mishra","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nehal","family":"Bhatnagar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2471-6375","authenticated-orcid":false,"given":"Prakasam","family":"P","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sureshkumar","family":"T. R","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,10,2]]},"reference":[{"key":"16849_CR1","doi-asserted-by":"publisher","first-page":"8669","DOI":"10.1007\/s00521-020-05616-w","volume":"33","author":"J Chen","year":"2021","unstructured":"Chen J, Wang C, Wang K et al (2021) HEU Emotion: a large-scale database for multimodal emotion recognition in the wild. Neural Comput Appl 33:8669\u20138685. https:\/\/doi.org\/10.1007\/s00521-020-05616-w","journal-title":"Neural Comput Appl"},{"key":"16849_CR2","doi-asserted-by":"publisher","first-page":"3705","DOI":"10.1007\/s11042-017-5539-3","volume":"78","author":"Y Zeng","year":"2019","unstructured":"Zeng Y, Mao H, Peng D (2019) Spectrogram-based multi-task audio classification. Multimed Tools Appl 78:3705\u20133722. https:\/\/doi.org\/10.1007\/s11042-017-5539-3","journal-title":"Multimed Tools Appl"},{"key":"16849_CR3","doi-asserted-by":"publisher","first-page":"23745","DOI":"10.1007\/s11042-020-09874-7","volume":"80","author":"R Jahangir","year":"2021","unstructured":"Jahangir R, Teh YW, Hanif F et al (2021) Deep learning approaches for speech emotion recognition: state of the art and research challenges. Multimed Tools Appl 80:23745\u201323812. https:\/\/doi.org\/10.1007\/s11042-020-09874-7","journal-title":"Multimed Tools Appl"},{"key":"16849_CR4","doi-asserted-by":"publisher","first-page":"11253","DOI":"10.1007\/s00521-019-04564-4","volume":"32","author":"S Jaiswal","year":"2020","unstructured":"Jaiswal S, Nandi GC (2020) Robust real-time emotion detection system using CNN architecture. Neural Comput Appl 32:11253\u201311262. https:\/\/doi.org\/10.1007\/s00521-019-04564-4","journal-title":"Neural Comput Appl"},{"key":"16849_CR5","doi-asserted-by":"publisher","unstructured":"Likitha MS, Gupta SRR, Hasitha K, Raju AU (2017) Speech-based human emotion recognition using MFCC. In: 2017 international conference on wireless communications, signal processing and networking (WiSPNET), pp 2257\u20132260. https:\/\/doi.org\/10.1109\/WiSPNET.2017.8300161","DOI":"10.1109\/WiSPNET.2017.8300161"},{"key":"16849_CR6","doi-asserted-by":"publisher","unstructured":"Atmaja BT, Sasou A, Akagi M (2022) Survey on bimodal speech emotion recognition from acoustic and linguistic information fusion. Speech Commun 140:11\u201328. https:\/\/doi.org\/10.1016\/j.specom.2022.03.002","DOI":"10.1016\/j.specom.2022.03.002"},{"key":"16849_CR7","doi-asserted-by":"publisher","first-page":"9602429","DOI":"10.1155\/2022\/9602429","volume":"2022","author":"A Monisha","year":"2022","unstructured":"Monisha A, Tamanna S, Sadia S (2022) A review of the advancement in speech emotion recognition for Indo-Aryan and Dravidian Languages. Adv Hum-Comput Interact 2022:9602429. https:\/\/doi.org\/10.1155\/2022\/9602429","journal-title":"Adv Hum-Comput Interact"},{"key":"16849_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2023.01.002","volume":"528","author":"JD Lope","year":"2023","unstructured":"Lope JD, Gra\u00f1a M (2023) An ongoing review of speech emotion recognition. Neurocomputing 528:1\u201311. https:\/\/doi.org\/10.1016\/j.neucom.2023.01.002","journal-title":"Neurocomputing"},{"key":"16849_CR9","doi-asserted-by":"publisher","unstructured":"Luvembe AM, Li W, Li S, Liu F, Xu G (2023) Dual emotion based fake news detection: a deep attention-weight update approach. Inf Process Manag 60(4):103354. https:\/\/doi.org\/10.1016\/j.ipm.2023.103354","DOI":"10.1016\/j.ipm.2023.103354"},{"key":"16849_CR10","doi-asserted-by":"publisher","first-page":"18503","DOI":"10.1007\/s11042-022-12764-9","volume":"81","author":"A Mohapatra","year":"2022","unstructured":"Mohapatra A, Thota N, Prakasam P (2022) Fake news detection and classification using hybrid BiLSTM and self-attention model. Multimed Tools Appl 81:18503\u201318519. https:\/\/doi.org\/10.1007\/s11042-022-12764-9","journal-title":"Multimed Tools Appl"},{"key":"16849_CR11","doi-asserted-by":"publisher","unstructured":"Kumbhar HS, Bhandari SU (2019) Speech emotion recognition using MFCC features and LSTM network. In: 2019 5th international conference on computing, communication, control and automation (ICCUBEA), pp 1\u20133. https:\/\/doi.org\/10.1109\/ICCUBEA47591.2019.9129067","DOI":"10.1109\/ICCUBEA47591.2019.9129067"},{"key":"16849_CR12","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-020-00250-4","author":"W Zehra","year":"2021","unstructured":"Zehra W, Javed AR, Jalil Z (2021) Cross corpus multi-lingual speech emotion recognition using ensemble learning. Complex Intell Syst. https:\/\/doi.org\/10.1007\/s40747-020-00250-4","journal-title":"Complex Intell Syst"},{"key":"16849_CR13","doi-asserted-by":"publisher","first-page":"1249","DOI":"10.3390\/s21041249","volume":"21","author":"BJ Abbaschian","year":"2021","unstructured":"Abbaschian BJ, Sierra-Sosa D, Elmaghraby A (2021) Deep learning techniques for speech emotion recognition, from databases to models. Sensors 21:1249. https:\/\/doi.org\/10.3390\/s21041249","journal-title":"Sensors"},{"key":"16849_CR14","doi-asserted-by":"publisher","first-page":"205","DOI":"10.3390\/app10010205","volume":"10","author":"C Zheng","year":"2020","unstructured":"Zheng C, Wang C, Jia N (2020) An ensemble model for multi-level speech emotion recognition. Appl Sci 10:205. https:\/\/doi.org\/10.3390\/app10010205","journal-title":"Appl Sci"},{"key":"16849_CR15","doi-asserted-by":"publisher","first-page":"125868","DOI":"10.1109\/ACCESS.2019.2938007","volume":"7","author":"H Meng","year":"2019","unstructured":"Meng H, Yan T, Yuan F, Wei H (2019) Speech emotion recognition from 3D Log-Mel spectrograms with deep learning network. IEEE Access 7:125868\u2013125881. https:\/\/doi.org\/10.1109\/ACCESS.2019.2938007","journal-title":"IEEE Access"},{"key":"16849_CR16","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.specom.2020.12.009","volume":"127","author":"S Zhang","year":"2021","unstructured":"Zhang S, Tao X, Chuang Y, Zhao X (2021) Learning deep multimodal affective features for spontaneous speech emotion recognition. Speech Commun 127:73\u201381. https:\/\/doi.org\/10.1016\/j.specom.2020.12.009","journal-title":"Speech Commun"},{"key":"16849_CR17","doi-asserted-by":"publisher","unstructured":"Mustaqeem, Sajjad M, Kwon S (2020) Clustering-based speech emotion recognition by incorporating learned features and deep BiLSTM. IEEE Access 8:79861-79875. https:\/\/doi.org\/10.1109\/ACCESS.2020.2990405","DOI":"10.1109\/ACCESS.2020.2990405"},{"key":"16849_CR18","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao J, Mao X, Chen L (2019) Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed Signal Process Control 47:312\u2013323. https:\/\/doi.org\/10.1016\/j.bspc.2018.08.035","journal-title":"Biomed Signal Process Control"},{"key":"16849_CR19","doi-asserted-by":"publisher","first-page":"2578","DOI":"10.21437\/Interspeech.2019-2822","volume":"2019","author":"L Tarantino","year":"2019","unstructured":"Tarantino L, Garner PN, Lazaridis A (2019) Self-attention for speech emotion recognition. Proc Interspeech 2019:2578\u20132582. https:\/\/doi.org\/10.21437\/Interspeech.2019-2822","journal-title":"Proc Interspeech"},{"key":"16849_CR20","doi-asserted-by":"publisher","unstructured":"Yoon S, Byun S, Jung K (2018) Multimodal speech emotion recognition using audio and text. 2018 IEEE spoken language technology workshop (SLT), 112\u2013118. https:\/\/doi.org\/10.1109\/SLT.2018.8639583","DOI":"10.1109\/SLT.2018.8639583"},{"issue":"5","key":"16849_CR21","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1145\/3129340","volume":"61","author":"BW Schuller","year":"2018","unstructured":"Schuller BW (2018) Speech emotion recognition: two decades in a nutshell, benchmarks, and ongoing trends. Commun ACM 61(5):90\u201399. https:\/\/doi.org\/10.1145\/3129340","journal-title":"Commun ACM"},{"key":"16849_CR22","doi-asserted-by":"publisher","unstructured":"Tzirakis P, Zhang J, Schuller BW (2018) End-to-end speech emotion recognition using deep neural networks. In: 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5089\u20135093. https:\/\/doi.org\/10.1109\/ICASSP.2018.8462677","DOI":"10.1109\/ICASSP.2018.8462677"},{"key":"16849_CR23","doi-asserted-by":"publisher","unstructured":"Mirsamadi S, Barsoum E, Zhang C (2017)Automatic speech emotion recognition using recurrent neural networks with local attention. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 2227\u20132231. https:\/\/doi.org\/10.1109\/ICASSP.2017.7952552","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"16849_CR24","doi-asserted-by":"publisher","unstructured":"Abdelwahab M, Busso C (2018) Study of dense network approaches for speech emotion recognition. In: 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5084\u20135088. https:\/\/doi.org\/10.1109\/ICASSP.2018.8461866","DOI":"10.1109\/ICASSP.2018.8461866"},{"key":"16849_CR25","doi-asserted-by":"publisher","unstructured":"Neumann M, Vu NT (2017) Attentive convolutional neural network based speech emotion recognition: a study on the impact of input features, signal length, and acted speech. In: Proceedings of the Annual Conference of the International Speech Communication Association (INTERSPEECH), pp 1263\u20131267. https:\/\/doi.org\/10.21437\/Interspeech.2017-917","DOI":"10.21437\/Interspeech.2017-917"},{"key":"16849_CR26","doi-asserted-by":"publisher","unstructured":"Har\u00e1r P, Burget R, Dutta MK (2017) Speech emotion recognition with deep learning. In: 2017 4th international conference on signal processing and integrated networks (SPIN), pp 137\u2013140. https:\/\/doi.org\/10.1109\/SPIN.2017.8049931","DOI":"10.1109\/SPIN.2017.8049931"},{"key":"16849_CR27","doi-asserted-by":"publisher","unstructured":"Lotfidereshgi R, Gournay P (2017) Biologically inspired speech emotion recognition. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5135\u20135139. https:\/\/doi.org\/10.1109\/ICASSP.2017.7953135","DOI":"10.1109\/ICASSP.2017.7953135"},{"key":"16849_CR28","doi-asserted-by":"publisher","unstructured":"Tzinis E, Potamianos A (2017) Segment-based speech emotion recognition using recurrent neural networks. In: 2017 seventh international conference on affective computing and intelligent interaction (ACII), pp 190\u2013195. https:\/\/doi.org\/10.1109\/ACII.2017.8273599","DOI":"10.1109\/ACII.2017.8273599"},{"key":"16849_CR29","doi-asserted-by":"publisher","unstructured":"Shegokar P, Sircar P (2016) Continuous wavelet transform based speech emotion recognition. In: 10th international conference on signal processing and communication systems (ICSPCS), pp 1\u20138. https:\/\/doi.org\/10.1109\/ICSPCS.2016.7843306","DOI":"10.1109\/ICSPCS.2016.7843306"},{"key":"16849_CR30","doi-asserted-by":"publisher","first-page":"32917","DOI":"10.1007\/s11042-020-09693-w","volume":"79","author":"R Dangol","year":"2020","unstructured":"Dangol R, Alsadoon A, Prasad PWC et al (2020) Speech emotion recognition using convolutional neural network and long-short TermMemory. Multimed Tools Appl 79:32917\u201332934. https:\/\/doi.org\/10.1007\/s11042-020-09693-w","journal-title":"Multimed Tools Appl"},{"key":"16849_CR31","doi-asserted-by":"publisher","first-page":"3111","DOI":"10.1007\/s13369-019-04293-9","volume":"45","author":"R Singh","year":"2020","unstructured":"Singh R, Puri H, Aggarwal N, Gupta V (2020) An efficient language-independent acoustic emotion classification system. Arab J Sci Eng 45:3111\u20133121","journal-title":"Arab J Sci Eng"},{"issue":"5","key":"16849_CR32","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1109\/LSP.2016.2537926","volume":"23","author":"Y Zong","year":"2016","unstructured":"Zong Y, Zheng W, Zhang T, Huang X (2016) Cross-corpus speech emotion recognition based on domain-adaptive least-squares regression. IEEE Signal Process Lett 23(5):585\u2013589. https:\/\/doi.org\/10.1109\/LSP.2016.2537926","journal-title":"IEEE Signal Process Lett"},{"key":"16849_CR33","doi-asserted-by":"publisher","first-page":"3705","DOI":"10.1007\/s11042-017-5539-3","volume":"78","author":"Y Zeng","year":"2017","unstructured":"Zeng Y, Mao H, Peng D, Yi Z (2017) Spectrogram based multi-task audio classification. Multimed Tools Appl 78:3705\u20133722","journal-title":"Multimed Tools Appl"},{"key":"16849_CR34","doi-asserted-by":"publisher","unstructured":"Yadav A, Vishwakarma DK (2020) A comparative study on bio-inspired algorithms for sentiment analysis. Clust Comput 23:2969\u20132989. https:\/\/doi.org\/10.1007\/s10586-020-03062-w","DOI":"10.1007\/s10586-020-03062-w"},{"key":"16849_CR35","doi-asserted-by":"publisher","unstructured":"Mohan BJ, Ramesh Babu N (2014) Speech Recognition using MFCC and DTW. In: International conference on advances in electrical engineering (ICAEE), pp 1\u20134. https:\/\/doi.org\/10.1109\/ICAEE.2014.6838564","DOI":"10.1109\/ICAEE.2014.6838564"},{"key":"16849_CR36","doi-asserted-by":"publisher","first-page":"3688","DOI":"10.21437\/Interspeech.2018-1811","volume":"2018","author":"P Yenigalla","year":"2018","unstructured":"Yenigalla P, Kumar A, Tripathi S, Singh C, Kar S, Vepa J (2018) Speech emotion recognition using spectrogram & phoneme embedding. Proc Interspeech 2018:3688\u20133692. https:\/\/doi.org\/10.21437\/Interspeech.2018-1811","journal-title":"Proc Interspeech"},{"key":"16849_CR37","doi-asserted-by":"publisher","unstructured":"Anvarjon T, Mustaqeem, Kwon S (2020) Deep-Net: a lightweight CNN-based speech emotion recognition system using deep frequency features. Sensors 20:5212. https:\/\/doi.org\/10.3390\/s20185212","DOI":"10.3390\/s20185212"},{"key":"16849_CR38","doi-asserted-by":"publisher","unstructured":"Lech M, Stolar M, Best C, Bolia R (2020) Real-time speech emotion recognition using a pre-trained image classification network: effects of bandwidth reduction and companding. Front Comput Sci 2(14). https:\/\/doi.org\/10.3389\/fcomp.2020.00014","DOI":"10.3389\/fcomp.2020.00014"},{"key":"16849_CR39","doi-asserted-by":"publisher","unstructured":"Yadav A, Vishwakarma DK (2020) A Multi-lingual Framework of CNN and Bi-LSTM for Emotion Classification. In: 2020 11th international conference on computing, communication and networking technologies (ICCCNT), pp 1\u20136. https:\/\/doi.org\/10.1109\/ICCCNT49239.2020.9225614","DOI":"10.1109\/ICCCNT49239.2020.9225614"},{"key":"16849_CR40","doi-asserted-by":"publisher","unstructured":"Singh J, Saheer LB, Faust O (2023) Speech emotion recognition using attention model. Int J Environ Res Public Health 20(6):5140. https:\/\/doi.org\/10.3390\/ijerph20065140","DOI":"10.3390\/ijerph20065140"},{"key":"16849_CR41","doi-asserted-by":"publisher","first-page":"4237","DOI":"10.1007\/s40747-022-00713-w","volume":"8","author":"M Swain","year":"2022","unstructured":"Swain M, Maji B, Kabisatpathy P et al (2022) A DCRNN-based ensemble classifier for speech emotion recognition in Odia language. Complex Intell Syst 8:4237\u20134249. https:\/\/doi.org\/10.1007\/s40747-022-00713-w","journal-title":"Complex Intell Syst"},{"key":"16849_CR42","doi-asserted-by":"publisher","unstructured":"Sun C, Li H, Ma L (2023) Speech emotion recognition based on improved masking EMD and convolutional recurrent neural network. Front Psychol 13:2022. https:\/\/doi.org\/10.3389\/fpsyg.2022.1075624","DOI":"10.3389\/fpsyg.2022.1075624"},{"key":"16849_CR43","doi-asserted-by":"publisher","unstructured":"Ullah S, Sahib QA, Faizullah, Ullah S, Haq IU, Ullah I (2022) Speech emotion recognition using deep neural networks. In: Proceedings of the IEEE international conference on IT and industrial technologies (ICIT), pp 01\u201306. https:\/\/doi.org\/10.1109\/ICIT56493.2022.9989197","DOI":"10.1109\/ICIT56493.2022.9989197"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16849-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16849-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16849-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,3]],"date-time":"2024-04-03T10:17:35Z","timestamp":1712139455000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16849-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,2]]},"references-count":43,"journal-issue":{"issue":"13","published-online":{"date-parts":[[2024,4]]}},"alternative-id":["16849"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16849-x","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,2]]},"assertion":[{"value":"9 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 September 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 October 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We hereby declare that there is no conflict of interest in this research work\/paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}