{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T02:14:58Z","timestamp":1776219298426,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"31","license":[{"start":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T00:00:00Z","timestamp":1708300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T00:00:00Z","timestamp":1708300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-18316-7","type":"journal-article","created":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T08:02:40Z","timestamp":1708329760000},"page":"75557-75584","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Speech emotion recognition using feature fusion: a hybrid approach to deep learning"],"prefix":"10.1007","volume":"83","author":[{"given":"Waleed Akram","family":"Khan","sequence":"first","affiliation":[]},{"given":"Hamad","family":"ul Qudous","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4267-0253","authenticated-orcid":false,"given":"Asma Ahmad","family":"Farhan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,19]]},"reference":[{"key":"18316_CR1","unstructured":"CMental health in the workplace. https:\/\/www.who.int\/teams\/mental-health-and-substance-use\/promotion-prevention\/mental-health-in-the-workplace. Accessed 25 Mar 2023"},{"key":"18316_CR2","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1016\/j.jad.2018.08.073","volume":"241","author":"Y Lee","year":"2018","unstructured":"Lee Y, Ragguett RM, Mansur RB, Boutilier JJ, Rosenblat JD, Trevizol A, Brietzke E, Lin K, Pan Z, Subramaniapillai M et al (2018) Applications of machine learning algorithms to predict therapeutic outcomes in depression: a meta-analysis and systematic review. J Affect Disord 241:519\u2013532","journal-title":"J Affect Disord"},{"key":"18316_CR3","doi-asserted-by":"crossref","unstructured":"Boumans R, van de Sande Y, Thill S, Bosse T (2022) Voice-enabled intelligent virtual agents for people with amnesia: systematic review. JMIR Aging 5(2):e32473","DOI":"10.2196\/32473"},{"key":"18316_CR4","first-page":"119","volume-title":"Enabling speech emotional intelligence as a service in homecare platforms","author":"P Stavrianos","year":"2022","unstructured":"Stavrianos P, Pavlopoulos A, Maglogiannis I (2022) Enabling speech emotional intelligence as a service in homecare platforms. A Compendium of Critical Factors for Success, Pervasive Healthcare, pp 119\u2013144"},{"key":"18316_CR5","doi-asserted-by":"publisher","first-page":"79861","DOI":"10.1109\/ACCESS.2020.2990405","volume":"8","author":"M Sajjad","year":"2020","unstructured":"Sajjad M, Kwon S et al (2020) Clustering-based speech emotion recognition by incorporating learned features and deep BiLSTM. IEEE Access 8:79861\u201379875","journal-title":"IEEE Access"},{"issue":"3","key":"18316_CR6","doi-asserted-by":"publisher","first-page":"832","DOI":"10.3390\/make1030048","volume":"1","author":"M Rhanoui","year":"2019","unstructured":"Rhanoui M, Mikram M, Yousfi S, Barzali S (2019) A CNN-BiLSTM model for document-level sentiment analysis. Mach learn knowl 1(3):832\u2013847","journal-title":"Mach learn knowl"},{"issue":"1","key":"18316_CR7","first-page":"183","volume":"20","author":"S Kwon","year":"2020","unstructured":"Kwon S et al (2020) A CNN-assisted enhanced audio signal processing for speech emotion recognition. Sensors 20(1):183","journal-title":"Sensors"},{"key":"18316_CR8","doi-asserted-by":"crossref","unstructured":"Zhang W, Zhao D, Chai Z, Yang LT, Liu X, Gong F, Yang S (2017) Deep learning and SVM-based emotion recognition from Chinese speech for smart affective services. Software: Pract Experience 47(8):1127\u20131138","DOI":"10.1002\/spe.2487"},{"key":"18316_CR9","doi-asserted-by":"crossref","unstructured":"Dissanayake V, Zhang H, Billinghurst M, Nanayakkara S (2020) Speech emotion recognition \u2018in the wild\u2019using an autoencoder. Interspeech 2020","DOI":"10.21437\/Interspeech.2020-1356"},{"key":"18316_CR10","doi-asserted-by":"crossref","unstructured":"Lakomkin E, Zamani MA, Weber C, Magg S, Wermter S (2018) On the robustness of speech emotion recognition for human-robot interaction with deep neural networks. In: 2018 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 854\u2013860","DOI":"10.1109\/IROS.2018.8593571"},{"issue":"2","key":"18316_CR11","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi SG, Rao KS (2012) Emotion recognition from speech: a review. Int J Speech Technol 15(2):99\u2013117","journal-title":"Int J Speech Technol"},{"key":"18316_CR12","doi-asserted-by":"publisher","first-page":"79861","DOI":"10.1109\/ACCESS.2020.2990405","volume":"8","author":"M Sajjad","year":"2020","unstructured":"Sajjad M, Kwon S et al (2020) Clustering-based speech emotion recognition by incorporating learned features and deep BiLSTM. IEEE Access 8:79861\u201379875","journal-title":"IEEE Access"},{"key":"18316_CR13","first-page":"3688","volume":"2018","author":"P Yenigalla","year":"2018","unstructured":"Yenigalla P, Kumar A, Tripathi S, Singh C, Kar S, Vepa J (2018) Speech Emotion Recognition Using Spectrogram & Phoneme Embedding. Interspeech 2018:3688\u20133692","journal-title":"Interspeech"},{"key":"18316_CR14","doi-asserted-by":"crossref","unstructured":"Nair MS, Gopinath DP (2022) Transfer learning for speech based emotion recognition. In: 2022 IEEE international conference on signal processing, informatics, communication and energy systems (SPICES), vol 1. IEEE, pp 559\u2013564","DOI":"10.1109\/SPICES52834.2022.9774103"},{"issue":"1","key":"18316_CR15","doi-asserted-by":"publisher","first-page":"183","DOI":"10.3390\/s20010183","volume":"20","author":"S Kwon","year":"2019","unstructured":"Kwon S (2019) A CNN-assisted enhanced audio signal processing for speech emotion recognition. Sensors 20(1):183","journal-title":"Sensors"},{"key":"18316_CR16","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao J, Mao X, Chen L (2019) Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed Signal Process Control 47:312\u2013323","journal-title":"Biomed Signal Process Control"},{"key":"18316_CR17","doi-asserted-by":"crossref","unstructured":"Satt A, Rozenberg S, Hoory R (2017) Efficient emotion recognition from speech using deep learning on spectrograms. In: Interspeech, pp 1089\u20131093","DOI":"10.21437\/Interspeech.2017-200"},{"key":"18316_CR18","doi-asserted-by":"crossref","unstructured":"Kerkeni, L, Serrestou Y, Mbarki M, Raoof K, Mahjoub MA, Cleder C (2019) Automatic speech emotion recognition using machine learning. In: Social media and machine learning. IntechOpen","DOI":"10.5772\/intechopen.84856"},{"key":"18316_CR19","doi-asserted-by":"crossref","unstructured":"Latif S, Qayyum A, Usman M, Qadir J (2018) Cross lingual speech emotion recognition: Urdu vs. western languages. In: 2018 international conference on frontiers of information technology (FIT). IEEE, pp 88\u201393","DOI":"10.1109\/FIT.2018.00023"},{"key":"18316_CR20","doi-asserted-by":"crossref","unstructured":"Peddinti V, Povey D, Khudanpur S (2015) A time delay neural network architecture for efficient modeling of long temporal contexts. In: Sixteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2015-647"},{"issue":"10","key":"18316_CR21","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen M, He X, Yang J, Zhang H (2018) 3-D convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Process Lett 25(10):1440\u20131444","journal-title":"IEEE Signal Process Lett"},{"key":"18316_CR22","unstructured":"Tripathi S, Kumar A, Ramesh A, Singh C, Yenigalla P (2019) Deep learning based emotion recognition system using speech features and transcriptions"},{"key":"18316_CR23","doi-asserted-by":"crossref","unstructured":"Lee J, Tashev I (2015) High-level feature representation using recurrent neural network for speech emotion recognition. In: Sixteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2015-336"},{"key":"18316_CR24","doi-asserted-by":"publisher","first-page":"117327","DOI":"10.1109\/ACCESS.2019.2936124","volume":"7","author":"RA Khalil","year":"2019","unstructured":"Khalil RA, Jones E, Babar MI, Jan T, Zafar MH, Alhussain T (2019) Speech emotion recognition using deep learning techniques: a review. IEEE Access 7:117327\u2013117345","journal-title":"IEEE Access"},{"key":"18316_CR25","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber J (2015) Deep learning in neural networks: an overview. Neural Networks 61:85\u2013117","journal-title":"Neural Networks"},{"issue":"18","key":"18316_CR26","doi-asserted-by":"publisher","first-page":"5212","DOI":"10.3390\/s20185212","volume":"20","author":"T Anvarjon","year":"2020","unstructured":"Anvarjon T, Kwon S (2020) Deep-net: a lightweight CNN-based speech emotion recognition system using deep frequency features. Sensors 20(18):5212","journal-title":"Sensors"},{"key":"18316_CR27","doi-asserted-by":"publisher","first-page":"125868","DOI":"10.1109\/ACCESS.2019.2938007","volume":"7","author":"H Meng","year":"2019","unstructured":"Meng H, Yan T, Yuan F, Wei H (2019) Speech emotion recognition from 3D log-mel spectrograms with deep learning network. IEEE Access 7:125868\u2013125881","journal-title":"IEEE Access"},{"key":"18316_CR28","unstructured":"Chernykh V, Prikhodko P (2017) Emotion recognition from speech with recurrent neural networks. arXiv:1701.08071"},{"key":"18316_CR29","doi-asserted-by":"crossref","unstructured":"Wen G, Li H, Huang J, Li D, Xun E (2017) Random deep belief networks for recognizing emotions from speech signals. Comput Intell Neurosci 2017","DOI":"10.1155\/2017\/1945630"},{"issue":"8","key":"18316_CR30","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","volume":"11","author":"P Tzirakis","year":"2017","unstructured":"Tzirakis P, Trigeorgis G, Nicolaou MA, Schuller BW, Zafeiriou S (2017) End-to-end multimodal emotion recognition using deep neural networks. IEEE J Sel Top Sign Process 11(8):1301\u20131309","journal-title":"IEEE J Sel Top Sign Process"},{"key":"18316_CR31","unstructured":"Tripathi S, Tripathi S, Beigi H (2018) Multi-modal emotion recognition on iemocap dataset using deep learning. arXiv:1804.05788"},{"key":"18316_CR32","doi-asserted-by":"crossref","unstructured":"Choi WY, Song KY, Lee CW (2018) Convolutional attention networks for multimodal emotion recognition from speech and text data. In: Proceedings of grand challenge and workshop on human multimodal language (Challenge-HML), pp 28\u201334","DOI":"10.18653\/v1\/W18-3304"},{"issue":"6","key":"18316_CR33","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1049\/iet-spr.2017.0320","volume":"12","author":"J Zhao","year":"2018","unstructured":"Zhao J, Mao X, Chen L (2018) Learning deep features to recognise speech emotion using merged deep CNN. IET Signal Process 12(6):713\u2013721","journal-title":"IET Signal Process"},{"key":"18316_CR34","doi-asserted-by":"crossref","unstructured":"Tang, Dengke and Zeng, Junlin and Li, Ming (2018) An end-to-end deep learning framework for speech emotion recognition of atypical individuals. In: Interspeech, pp 162\u2013166","DOI":"10.21437\/Interspeech.2018-2581"},{"key":"18316_CR35","doi-asserted-by":"publisher","first-page":"7717","DOI":"10.1109\/ACCESS.2018.2888882","volume":"7","author":"A Khamparia","year":"2019","unstructured":"Khamparia A, Gupta D, Nguyen NG, Khanna A, Pandey B, Tiwari P (2019) Sound classification using convolutional neural network and tensor deep stacking network. IEEE Access 7:7717\u20137727","journal-title":"IEEE Access"},{"issue":"11","key":"18316_CR36","doi-asserted-by":"publisher","first-page":"1675","DOI":"10.1109\/TASLP.2019.2925934","volume":"27","author":"Y Xie","year":"2019","unstructured":"Xie Y, Liang R, Liang Z, Huang C, Zou C, Schuller B (2019) Speech emotion classification using attention-based LSTM. IEEE\/ACM Trans Audio Speech Language Process 27(11):1675\u20131685","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"key":"18316_CR37","doi-asserted-by":"crossref","unstructured":"Aguilar G, Rozgi\u0107 V, Wang W, Wang C (2019) Multimodal and multi-view models for emotion recognition. arXiv:1906.10198","DOI":"10.18653\/v1\/P19-1095"},{"key":"18316_CR38","doi-asserted-by":"crossref","unstructured":"Li Y, Zhao T, Kawahara T (2019) Improved end-to-end speech emotion recognition using self attention mechanism and multitask learning. In: Interspeech, pp 2803\u20132807","DOI":"10.21437\/Interspeech.2019-2594"},{"key":"18316_CR39","doi-asserted-by":"crossref","unstructured":"Kwon S et al (2021) MLT-DNet: speech emotion recognition using 1D dilated CNN based on multi-learning trick approach. Expert Syst Appl 167:114177","DOI":"10.1016\/j.eswa.2020.114177"},{"key":"18316_CR40","doi-asserted-by":"crossref","unstructured":"Livingstone SR, Russo FA (2018) The Ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PloS one 13(5):e0196391","DOI":"10.1371\/journal.pone.0196391"},{"issue":"3","key":"18316_CR41","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1121\/1.1906762","volume":"23","author":"JL Flanagan","year":"1951","unstructured":"Flanagan JL (1951) Effect of delay distortion upon the intelligibility and quality of speech. J Acoust Soc Am 23(3):303\u2013307","journal-title":"J Acoust Soc Am"},{"key":"18316_CR42","unstructured":"Bachu RG, Kopparthi S, Adapa B, Barkana BD (2008) Separation of voiced and unvoiced using zero crossing rate and energy of the speech signal. In: American society for engineering education (ASEE) zone conference proceedings, pp 1\u20137"},{"key":"18316_CR43","unstructured":"Wu J (2017) Introduction to convolutional neural networks. National Key Lab for Novel Software Technology. Nanjing University. China 5(23):495"},{"key":"18316_CR44","doi-asserted-by":"crossref","unstructured":"Scherer D, M\u00fcller A, Behnke S (2010) Evaluation of pooling operations in convolutional architectures for object recognition. In: International conference on artificial neural networks. Springer, pp 92\u2013101","DOI":"10.1007\/978-3-642-15825-4_10"},{"issue":"8","key":"18316_CR45","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"18316_CR46","doi-asserted-by":"crossref","unstructured":"Livingstone SR, Russo FA (2018) The Ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PloS one 13(5):e0196391","DOI":"10.1371\/journal.pone.0196391"},{"key":"18316_CR47","unstructured":"Chollet F (2015) keras. GitHub. https:\/\/keras.io"},{"key":"18316_CR48","unstructured":"Diederik PK, Ba J (2017) Adam: a method for stochastic optimization"},{"issue":"978","key":"18316_CR49","first-page":"3","volume":"10","author":"Charu C Aggarwal","year":"2018","unstructured":"Aggarwal Charu C et al (2018) Neural networks and deep learning. Springer 10(978):3","journal-title":"Springer"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18316-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-18316-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18316-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T08:28:55Z","timestamp":1725352135000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-18316-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,19]]},"references-count":49,"journal-issue":{"issue":"31","published-online":{"date-parts":[[2024,9]]}},"alternative-id":["18316"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-18316-7","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,19]]},"assertion":[{"value":"22 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 January 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}]}}