{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T08:46:11Z","timestamp":1770108371682,"version":"3.49.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T00:00:00Z","timestamp":1769990400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T00:00:00Z","timestamp":1769990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-026-21244-3","type":"journal-article","created":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T19:21:33Z","timestamp":1770060093000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hybrid acoustic-deep features with auto encoders for speech emotion recognition"],"prefix":"10.1007","volume":"85","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8302-896X","authenticated-orcid":false,"given":"Kogila","family":"Raghu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manchala","family":"Sadanandam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bh","family":"Hanumanthu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,2]]},"reference":[{"issue":"1","key":"21244_CR1","doi-asserted-by":"publisher","first-page":"10517","DOI":"10.1038\/s41598-020-66405-y","volume":"10","author":"P Schlegel","year":"2020","unstructured":"Schlegel P, Kniesburges S, D\u00fcrr S, Sch\u00fctzenberger A, D\u00f6llinger M (2020) Machine learning based identification of relevant parameters for functional voice disorders derived from endoscopic high-speed recordings. Sci Rep 10(1):10517. https:\/\/doi.org\/10.1038\/s41598-020-66405-y","journal-title":"Sci Rep"},{"issue":"4","key":"21244_CR2","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1109\/TAFFC.2015.2432810","volume":"6","author":"M Nardelli","year":"2015","unstructured":"Nardelli M, Valenza G, Greco A, Lanata A, Scilingo EP (2015) Recognizing emotions induced by affective sounds through heart rate variability. IEEE Trans Affect Comput 6(4):385\u2013394. https:\/\/doi.org\/10.1109\/TAFFC.2015.2432810","journal-title":"IEEE Trans Affect Comput"},{"issue":"4","key":"21244_CR3","doi-asserted-by":"publisher","first-page":"1334","DOI":"10.1016\/j.jnca.2006.09.007","volume":"30","author":"H Gunes","year":"2007","unstructured":"Gunes H, Piccardi M (2007) Bi-modal emotion recognition from expressive face and body gestures. J Netw Comput Appl 30(4):1334\u20131345. https:\/\/doi.org\/10.1016\/j.jnca.2006.09.007","journal-title":"J Netw Comput Appl"},{"key":"21244_CR4","first-page":"460","volume":"24","author":"D Po\u0142ap","year":"2018","unstructured":"Po\u0142ap D (2018) Model of identity verification support system based on voice and image samples. J Univ Comput Sci 24:460\u2013474","journal-title":"J Univ Comput Sci"},{"issue":"5","key":"21244_CR5","doi-asserted-by":"publisher","first-page":"63","DOI":"10.14132\/j.cnki.1673-5439.2018.05.009","volume":"38","author":"G Lu","year":"2018","unstructured":"Lu G, Yuan L, Yang W, Yan J, Li H (2018) Speech emotion recognition based on long short-term memory and convolutional neural networks. J Nanjing Univ Posts Telecommun 38(5):63\u201369. https:\/\/doi.org\/10.14132\/j.cnki.1673-5439.2018.05.009","journal-title":"J Nanjing Univ Posts Telecommun"},{"key":"21244_CR6","doi-asserted-by":"publisher","unstructured":"Garg V, Kumar H, Sinha R (2013) \u2018Speech based emotion recognition based on hierarchical decision tree with SVM, BLG and SVR classifiers. in Proc Nat Conf Commun (NCC) pp. 1\u20135. https:\/\/doi.org\/10.1109\/ncc.2013.6487987","DOI":"10.1109\/ncc.2013.6487987"},{"key":"21244_CR7","doi-asserted-by":"crossref","unstructured":"Han K, Yu D, Tashev I (2014) Speech emotion recognition using deep neural network and extreme learning machine. in Proc Annu Conf Int Speech Commun. Assoc pp. 1\u20135","DOI":"10.21437\/Interspeech.2014-57"},{"key":"21244_CR8","doi-asserted-by":"publisher","unstructured":"Mittal S, Agarwal S, Nigam MJ (2018) Real time multiple face recognition: a deep learning approach. in Proc Int Conf Digit Med.Image Process (DMIP) 70\u201376. https:\/\/doi.org\/10.1145\/3299852.3299853","DOI":"10.1145\/3299852.3299853"},{"key":"21244_CR9","doi-asserted-by":"publisher","unstructured":"Bae H-S, Lee H-J, Lee S-G (2016) Voice recognition based on adaptive MFCC and deep learning. in Proc. IEEE 11th Conf Ind Electron Appl (ICIEA) pp. 1542\u20131546. https:\/\/doi.org\/10.1109\/iciea.2016.7603830","DOI":"10.1109\/iciea.2016.7603830"},{"issue":"5","key":"21244_CR10","doi-asserted-by":"publisher","DOI":"10.1002\/ett.3774","volume":"31","author":"KR Malik","year":"2019","unstructured":"Malik KR, Ahmad M, Khalid S, Ahmad H, Al-Turjman F, Jabbar S (2019) Image and command hybrid model for vehicle control using Internet of Vehicles. Trans Emerging Telecommun Technol 31(5):e3774. https:\/\/doi.org\/10.1002\/ett.3774","journal-title":"Trans Emerging Telecommun Technol"},{"key":"21244_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa D, Demirci MF, Yazici A (2020) Speech emotion recognition with deep convolutional neural networks. Biomed Signal Process Control 59:101894","journal-title":"Biomed Signal Process Control"},{"issue":"7\u20138","key":"21244_CR12","doi-asserted-by":"publisher","first-page":"1539","DOI":"10.1007\/s00521-013-1377-z","volume":"24","author":"X Zhao","year":"2014","unstructured":"Zhao X, Zhang S, Lei B (2014) Robust emotion recognition in noisy speech via sparse representation. Neural Comput Appl 24(7\u20138):1539\u20131553. https:\/\/doi.org\/10.1007\/s00521-013-1377-z","journal-title":"Neural Comput Appl"},{"key":"21244_CR13","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1016\/j.apacoust.2018.11.028","volume":"146","author":"T \u00d6zseven","year":"2019","unstructured":"\u00d6zseven T (2019) A novel feature selection method for speech emotion recognition. Appl Acoust 146:320\u2013326. https:\/\/doi.org\/10.1016\/j.apacoust.2018.11.028","journal-title":"Appl Acoust"},{"key":"21244_CR14","doi-asserted-by":"publisher","unstructured":"Bhavan A, Chauhan P, Hitkul, Shah RR (2019) Bagged support vector machines for emotion recognition from speech. Knowl.-Based Syst 184: 104886. https:\/\/doi.org\/10.1016\/j.knosys.2019. 104886","DOI":"10.1016\/j.knosys.2019"},{"issue":"5","key":"21244_CR15","first-page":"82","volume":"27","author":"S Li","year":"2017","unstructured":"Li S, Xu L (2017) Research on emotion recognition algorithm based on spectrogram feature extraction of bottleneck feature. Comput Technol Dev 27(5):82\u201386","journal-title":"Comput Technol Dev"},{"key":"21244_CR16","doi-asserted-by":"publisher","unstructured":"Tzirakis P, Zhang J, Schuller BW (2018) End-to-end speech emotion recognition using deep neural networks. in Proc. IEEE Int Conf Acoust Speech Signal Process (ICASSP) 5089\u20135093. https:\/\/doi.org\/10.1109\/icassp.2018.8462677","DOI":"10.1109\/icassp.2018.8462677"},{"key":"21244_CR17","first-page":"367","volume":"68","author":"K Khanchandani","year":"2009","unstructured":"Khanchandani K, Hussain M (2009) Emotion recognition using multilayer perceptron and generalized feed forward neural network. J Sci Ind Res 68:367\u2013371","journal-title":"J Sci Ind Res"},{"issue":"6","key":"21244_CR18","doi-asserted-by":"publisher","first-page":"1181","DOI":"10.1007\/s00521-012-0884-7","volume":"22","author":"D Gharavian","year":"2013","unstructured":"Gharavian D, Sheikhan M, Ashoftedel F (2013) Emotion recognition improvement using normalized formant supplementary features by hybrid of DTW-MLP-GMM model. Neural Comput Appl 22(6):1181\u20131191. https:\/\/doi.org\/10.1007\/s00521-012-0884-7","journal-title":"Neural Comput Appl"},{"key":"21244_CR19","doi-asserted-by":"publisher","first-page":"221640","DOI":"10.1109\/ACCESS.2020.3043201","volume":"8","author":"MB Er","year":"2020","unstructured":"Er MB (2020) A novel approach for classification of speech emotions based on deep and acoustic features. IEEE Access 8:221640\u2013221653","journal-title":"IEEE Access"},{"key":"21244_CR20","doi-asserted-by":"publisher","first-page":"152423","DOI":"10.1109\/ACCESS.2020.3017462","volume":"8","author":"TW Sun","year":"2020","unstructured":"Sun TW (2020) End-to-end speech emotion recognition with gender information. IEEE Access 8:152423\u2013152438","journal-title":"IEEE Access"},{"key":"21244_CR21","doi-asserted-by":"publisher","first-page":"79861","DOI":"10.1109\/ACCESS.2020.2990405","volume":"8","author":"M Sajjad","year":"2020","unstructured":"Sajjad M, Kwon S (2020) Clustering-based speech emotion recognition by incorporating learned features and deep BiLSTM. IEEE Access 8:79861\u201379875","journal-title":"IEEE Access"},{"key":"21244_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107101","author":"S Kwon","year":"2021","unstructured":"Kwon S (2021) Att-net: enhanced emotion recognition system using lightweight self-attention module. Appl Soft Comput. https:\/\/doi.org\/10.1016\/j.asoc.2021.107101","journal-title":"Appl Soft Comput"},{"key":"21244_CR23","doi-asserted-by":"publisher","first-page":"106889","DOI":"10.1109\/ACCESS.2020.3000751","volume":"8","author":"H Zhao","year":"2020","unstructured":"Zhao H, Xiao Y, Zhang Z (2020) Robust semisupervised generative adversarial networks for speech emotion recognition via distribution smoothness. IEEE Access 8:106889\u2013106900","journal-title":"IEEE Access"},{"key":"21244_CR24","unstructured":"Yi L, Mak MW (2020) Improving speech emotion recognition with adversarial data augmentation network. IEEE Transactions on Neural Networks and Learning Systems"},{"key":"21244_CR25","doi-asserted-by":"crossref","unstructured":"Latif S, Rana R, Khalifa S, Jurdak R, Epps J, Schuller BW (2020) Multi-task semi-supervised adversarial autoencoding for speech emotion recognition. IEEE Transactions on Affective Computing","DOI":"10.36227\/techrxiv.16689484"},{"key":"21244_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.02.085","author":"K Wang","year":"2020","unstructured":"Wang K, Su G, Liu L, Wang S (2020) Wavelet packet analysis for speaker-independent emotion recognition. Neurocomputing. https:\/\/doi.org\/10.1016\/j.neucom.2020.02.085","journal-title":"Neurocomputing"},{"key":"21244_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106547","volume":"211","author":"T Tuncer","year":"2021","unstructured":"Tuncer T, Dogan S, Acharya UR (2021) Automated accurate speech emotion recognition system using twine shuffle pattern and iterative neighborhood component analysis techniques. Knowledge Based Syst 211:106547","journal-title":"Knowledge Based Syst"},{"key":"21244_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-020-09689-9","author":"AP Reddy","year":"2020","unstructured":"Reddy AP, Vijayarajan V (2020) Audio compression with multi-algorithm fusion and its impact in speech emotion recognition. Int J Speech Technol. https:\/\/doi.org\/10.1007\/s10772-020-09689-9","journal-title":"Int J Speech Technol"},{"issue":"47","key":"21244_CR29","doi-asserted-by":"publisher","first-page":"35739","DOI":"10.1007\/s11042-020-09591-1","volume":"79","author":"A Bakhshi","year":"2020","unstructured":"Bakhshi A, Chalup S, Harimi A, Mirhassani SM (2020) Recognition of emotion from speech using evolutionary cepstral coefficients. Multimedia Tools Appl 79(47):35739\u201335759","journal-title":"Multimedia Tools Appl"},{"key":"21244_CR30","doi-asserted-by":"crossref","unstructured":"Sugan N, Srinivas NSS, Kumar LS, Nath MK, Kanhe A (2020) Speech emotion recognition using cepstral features extracted with novel triangular filter banks based on bark and ERB frequency scales. Digital Signal Processing. 102763","DOI":"10.1016\/j.dsp.2020.102763"},{"key":"21244_CR31","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1016\/j.ins.2020.09.047","volume":"548","author":"D Li","year":"2021","unstructured":"Li D, Zhou Y, Wang Z, Gao D (2021) Exploiting the potentialities of features for speech emotion recognition. Inf Sci 548:328\u2013343","journal-title":"Inf Sci"},{"key":"21244_CR32","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.ins.2019.09.005","volume":"509","author":"L Chen","year":"2020","unstructured":"Chen L, Su W, Feng Y, Wu M, She J, Hirota K (2020) Two-layer fuzzy multiple random forest for speech emotion recognition in human-robot interaction. Inf Sci 509:150\u2013163","journal-title":"Inf Sci"},{"key":"21244_CR33","doi-asserted-by":"crossref","unstructured":"Song P, Zheng W, Yu Y, Ou S (2020) Speech emotion recognition based on robust discriminative sparse regression. IEEE Transactions on Cognitive and Developmental Systems","DOI":"10.1109\/TCDS.2020.2990928"},{"issue":"4","key":"21244_CR34","doi-asserted-by":"publisher","first-page":"3111","DOI":"10.1007\/s13369-019-04293-9","volume":"45","author":"R Singh","year":"2020","unstructured":"Singh R, Puri H, Aggarwal N, Gupta V (2020) An efficient language-independent acoustic emotion classification system. Arab J Sci Eng 45(4):3111\u20133121","journal-title":"Arab J Sci Eng"},{"issue":"1","key":"21244_CR35","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1016\/j.aej.2020.11.004","volume":"60","author":"Z Zhang","year":"2021","unstructured":"Zhang Z (2021) Speech feature selection and emotion recognition based on weighted binary cuckoo search. Alexandria Eng J 60(1):1499\u20131507","journal-title":"Alexandria Eng J"},{"key":"21244_CR36","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.specom.2020.08.002","volume":"124","author":"M Hou","year":"2020","unstructured":"Hou M, Li J, Lu G (2020) A supervised non-negative matrix factorization model for speech emotion recognition. Speech Commun 124:13\u201320","journal-title":"Speech Commun"},{"key":"21244_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2021.107172","volume":"93","author":"a Husam Ali Abdulmohsin","year":"2021","unstructured":"Husam Ali Abdulmohsin a, Hala Bahjat Abdul wahab b, Abdul Mohssen Jaber Abdul hossen c (2021) A new proposed statistical feature extraction method in speech emotion recognition. Comput Electr Eng 93:107172","journal-title":"Comput Electr Eng"},{"key":"21244_CR38","doi-asserted-by":"publisher","first-page":"4750","DOI":"10.3390\/app13084750","volume":"13","author":"AS Alluhaidan","year":"2023","unstructured":"Alluhaidan AS, Saidani O, Jahangir R, Nauman MA, Neffati OS (2023) Speech emotion recognition through hybrid features and convolutional neural network. Appl Sci 13:4750. https:\/\/doi.org\/10.3390\/app13084750","journal-title":"Appl Sci"},{"key":"21244_CR39","doi-asserted-by":"crossref","unstructured":"Shaila SG, Sindhu A, Monish L,\u00a0Shivamma D, Vaishali B (2023) Speech emotion recognition using machine learning approach, ICAMIDA 2022, ACSR 105592\u2013599","DOI":"10.2991\/978-94-6463-136-4_50"},{"key":"21244_CR40","doi-asserted-by":"publisher","first-page":"80497","DOI":"10.1007\/s11042-023-17915-0","volume":"83","author":"A Tripathi","year":"2024","unstructured":"Tripathi A, Rani P (2024) An improved MSER using grid search based PCA and ensemble voting technique. Multimed Tools Appl 83:80497\u201380522. https:\/\/doi.org\/10.1007\/s11042-023-17915-0","journal-title":"Multimed Tools Appl"},{"issue":"5","key":"21244_CR41","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone SR, Russo FA (2018) The Ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PLoS One 13(5):e0196391. https:\/\/doi.org\/10.1371\/journal.pone.0196391","journal-title":"PLoS One"},{"key":"21244_CR42","doi-asserted-by":"publisher","first-page":"1861","DOI":"10.18280\/ts.380631","volume":"38","author":"K Raghu","year":"2021","unstructured":"Raghu K, Sadanandam M (2021) A perspective study on speech emotion recognition: databases, features and classification models. Traitement Du Signal 38:1861\u20131873. https:\/\/doi.org\/10.18280\/ts.380631","journal-title":"Traitement Du Signal"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-026-21244-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-026-21244-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-026-21244-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T19:21:35Z","timestamp":1770060095000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-026-21244-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,2]]},"references-count":42,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["21244"],"URL":"https:\/\/doi.org\/10.1007\/s11042-026-21244-3","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,2]]},"assertion":[{"value":"2 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Ethical approval for the research conducted in this study has been obtained from the relevant ethics committee or review board.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not Applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to publish"}},{"value":"Not Applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Authors declare there is no conflict of interest.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"103"}}