{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T05:18:25Z","timestamp":1778563105640,"version":"3.51.4"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s10772-023-10047-8","type":"journal-article","created":{"date-parts":[[2023,10,5]],"date-time":"2023-10-05T05:01:46Z","timestamp":1696482106000},"page":"721-733","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Deep learning structure for emotion prediction using MFCC from native languages"],"prefix":"10.1007","volume":"26","author":[{"given":"A. Suresh","family":"Rao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3912-3302","authenticated-orcid":false,"given":"A. Pramod","family":"Reddy","sequence":"additional","affiliation":[]},{"given":"Pragathi","family":"Vulpala","sequence":"additional","affiliation":[]},{"given":"K. Shwetha","family":"Rani","sequence":"additional","affiliation":[]},{"given":"P.","family":"Hemalatha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,5]]},"reference":[{"key":"10047_CR1","doi-asserted-by":"publisher","first-page":"15400","DOI":"10.1109\/ACCESS.2017.2728801","volume":"5","author":"AKH Al-Ali","year":"2017","unstructured":"Al-Ali, A. K. H., Dean, D., Senadji, B., Chandran, V., & Naik, G. R. (2017). Enhanced forensic speaker verification using a combination of dwt and mfcc feature warping in the presence of noise and reverberation conditions. IEEE Access, 5, 15400\u201315413.","journal-title":"IEEE Access"},{"issue":"9","key":"10047_CR2","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1177\/070674370505000905","volume":"50","author":"B Bediou","year":"2005","unstructured":"Bediou, B., Krolak-Salmon, P., Saoud, M., Henaff, M.-A., Burt, M., Dalery, J., & D\u2019Amato, T. (2005). Facial expression and sex recognition in schizophrenia and depression. The Canadian Journal of Psychiatry, 50(9), 525\u2013533.","journal-title":"The Canadian Journal of Psychiatry"},{"key":"10047_CR3","unstructured":"Boersma, P. (2011). Praat: Doing phonetics by computer [computer program]. http:\/\/www.praat.org\/"},{"key":"10047_CR4","doi-asserted-by":"publisher","first-page":"8869","DOI":"10.1109\/ACCESS.2017.2694446","volume":"5","author":"M Chen","year":"2017","unstructured":"Chen, M., Hao, Y., Hwang, K., Wang, L., & Wang, L. (2017). Disease prediction by machine learning over big data from healthcare communities. IEEE Access, 5, 8869\u20138879.","journal-title":"Ieee Access"},{"issue":"3","key":"10047_CR5","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1109\/MCOM.2018.1700274","volume":"56","author":"M Chen","year":"2018","unstructured":"Chen, M., Zhang, Y., Qiu, M., Guizani, N., & Hao, Y. (2018). Spha: Smart personal health advisor based on deep analytics. IEEE Communications Magazine, 56(3), 164\u2013169.","journal-title":"IEEE Communications Magazine"},{"issue":"4","key":"10047_CR6","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"10047_CR7","doi-asserted-by":"crossref","unstructured":"Deshmukh, G., Gaonkar, A., Golwalkar, G., & Kulkarni, S. (2019). Speech based emotion recognition using machine learning. In 2019 3rd International conference on computing methodologies and communication (ICCMC) (pp. 812\u2013817). IEEE.","DOI":"10.1109\/ICCMC.2019.8819858"},{"key":"10047_CR8","first-page":"46","volume-title":"Nonverbal communication: Where nature meets culture","author":"P Ekman","year":"1997","unstructured":"Ekman, P., & Keltner, D. (1997). Universal facial expressions of emotion. In U. Segerstrale & P. Molnar (Eds.), Nonverbal communication: Where nature meets culture (Vol. 27, p. 46). Springer."},{"issue":"3","key":"10047_CR9","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"El Ayadi, M., Kamel, M. S., & Karray, F. (2011). Survey on speech emotion recognition: Features, classification schemes, and databases. Pattern Recognition, 44(3), 572\u2013587.","journal-title":"Pattern Recognition"},{"key":"10047_CR10","doi-asserted-by":"crossref","unstructured":"Gao, Y., Li, B., Wang, N., & Zhu, T. (2017). Speech emotion recognition using local and global features. In International conference on brain informatics (pp. 3\u201313). Springer.","DOI":"10.1007\/978-3-319-70772-3_1"},{"key":"10047_CR11","doi-asserted-by":"crossref","unstructured":"Geethashree, A., & Ravi, D. (2018). Kannada emotional speech database: Design, development and evaluation. In Proceedings of international conference on cognition and recognition (pp. 135\u2013143). Springer.","DOI":"10.1007\/978-981-10-5146-3_14"},{"key":"10047_CR12","unstructured":"Global Health Data Exchange (GHDx)., Institute Of Health Metrics And Evaluation. \u201cGBD Results Tool | GHDx.\u201d GBD Results Tool | GHDx. ghdx.healthdata.org, 2019. http:\/\/ghdx.healthdata.org\/gbd-results-tool?params=gbd-api-2019-permalink\/d780dffbe8a381b25e1416884959e88b"},{"key":"10047_CR13","volume-title":"Deep learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A., & Bengio, Y. (2016). Deep learning (Vol. 1). MIT Press."},{"key":"10047_CR14","doi-asserted-by":"crossref","unstructured":"Huang, Z., Dong, M., Mao, Q., & Zhan, Y. (2014). Speech emotion recognition using cnn. In Proceedings of the 22nd ACM international conference on multimedia (pp. 801\u2013804).","DOI":"10.1145\/2647868.2654984"},{"key":"10047_CR15","volume-title":"Spoken language processing: A guide to theory, algorithm, and system development","author":"X Huang","year":"2001","unstructured":"Huang, X., Acero, A., Hon, H.-W., & Reddy, R. (2001). Spoken language processing: A guide to theory, algorithm, and system development. Prentice Hall PTR."},{"key":"10047_CR16","doi-asserted-by":"crossref","unstructured":"Iqbal, A., Barua, K. (2019). A real-time emotion recognition from speech using gradient boosting. In 2019 International conference on electrical, computer and communication engineering (ECCE) (pp. 1\u20135). IEEE","DOI":"10.1109\/ECACE.2019.8679271"},{"key":"10047_CR17","doi-asserted-by":"crossref","unstructured":"Jannat, R., Tynes, I., Lime, L. L., Adorno, J., & Canavan, S. (2018). Ubiquitous emotion recognition using audio and video data. In Proceedings of the 2018 ACM international joint conference and 2018 International symposium on pervasive and ubiquitous computing and wearable computers (pp. 956\u2013959).","DOI":"10.1145\/3267305.3267689"},{"key":"10047_CR18","doi-asserted-by":"crossref","unstructured":"Jin, B., & Liu, G. (2017). Speech emotion recognition based on hyper-prosodic features. In 2017 International conference on computer technology, electronics and communication (ICCTEC) (pp. 82\u201387). IEEE.","DOI":"10.1109\/ICCTEC.2017.00027"},{"issue":"1","key":"10047_CR19","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.inffus.2011.08.001","volume":"14","author":"B Khaleghi","year":"2013","unstructured":"Khaleghi, B., Khamis, A., Karray, F. O., & Razavi, S. N. (2013). Multisensor data fusion: A review of the state-of-the-art. Information Fusion, 14(1), 28\u201344.","journal-title":"Information Fusion"},{"key":"10047_CR20","doi-asserted-by":"crossref","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K. S. (2009). Iitkgp-sesc: Speech database for emotion analysis. In International conference on contemporary computing (pp. 485\u2013492). Springer.","DOI":"10.1007\/978-3-642-03547-0_46"},{"key":"10047_CR21","doi-asserted-by":"crossref","unstructured":"Kwon, O.-W., Chan, K., Hao, J., & Lee, T.-W. (2003). Emotion recognition by speech signals. In Eighth European conference on speech communication and technology.","DOI":"10.21437\/Eurospeech.2003-80"},{"issue":"10","key":"10047_CR22","first-page":"1995","volume":"3361","author":"Y LeCun","year":"1995","unstructured":"LeCun, Y., Bengio, Y., et al. (1995). Convolutional networks for images, speech, and time series. The Handbook of Brain Theory and Neural Networks, 3361(10), 1995.","journal-title":"The Handbook of Brain Theory and Neural Networks"},{"issue":"5","key":"10047_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone, S. R., & Russo, F. A. (2018). The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in north American English. PLoS ONE, 13(5), 1\u201335. https:\/\/doi.org\/10.1371\/journal.pone.0196391","journal-title":"PLoS ONE"},{"key":"10047_CR25","doi-asserted-by":"crossref","unstructured":"Pinto, M. G., Polignano, M., Lops, P., Semeraro, G. (2020). Emotions understanding model from spoken language using deep neural networks and mel-frequency cepstral coefficients. In 2020 IEEE conference on evolving and adaptive intelligent systems (EAIS) (pp. 1\u20135). IEEE.","DOI":"10.1109\/EAIS48028.2020.9122698"},{"key":"10047_CR26","doi-asserted-by":"publisher","first-page":"1097","DOI":"10.1016\/j.protcy.2016.05.242","volume":"24","author":"T Rajisha","year":"2016","unstructured":"Rajisha, T., Sunija, A., & Riyas, K. (2016). Performance analysis of Malayalam language speech emotion recognition system using ANN\/SVM. Procedia Technology, 24, 1097\u20131104.","journal-title":"Procedia Technology"},{"issue":"16","key":"10047_CR27","first-page":"5760","volume":"12","author":"AP Reddy","year":"2017","unstructured":"Reddy, A. P., & Vijayarajan, V. (2017). Extraction of emotions from speech-a survey. International Journal of Applied Engineering Research, 12(16), 5760\u20135767.","journal-title":"International Journal of Applied Engineering Research"},{"issue":"2","key":"10047_CR28","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1109\/T-AFFC.2011.34","volume":"3","author":"M Schroder","year":"2011","unstructured":"Schroder, M., Bevacqua, E., Cowie, R., Eyben, F., Gunes, H., Heylen, D., Ter Maat, M., McKeown, G., Pammi, S., Pantic, M., et al. (2011). Building autonomous sensitive artificial listeners. IEEE Transactions on Affective Computing, 3(2), 165\u2013183.","journal-title":"IEEE Transactions on Affective Computing"},{"issue":"4","key":"10047_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.14569\/IJACSA.2020.01104104","volume":"11","author":"ZS Syed","year":"2020","unstructured":"Syed, Z. S., Memon, S. A., Shah, M. S., & Syed, A. S. (2020). Introducing the Urdu-Sindhi speech emotion corpus: A novel dataset of speech recordings for emotion recognition for two low-resource languages. International Journal of Advanced Computer Science and Applications, 11(4), 1\u20136.","journal-title":"International Journal of Advanced Computer Science and Applications"},{"key":"10047_CR30","doi-asserted-by":"crossref","unstructured":"Trigeorgis, G., Ringeval, F., Brueckner, R., Marchi, E., Nicolaou, M. A., Schuller, B., & Zafeiriou, S. (2016). Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5200\u20135204). IEEE.","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"10047_CR31","unstructured":"Vasquez-Correa, J. C., Arias-Vergara, T., Orozco-Arroyave, J. R., Vargas-Bonilla, J. F., & Noeth, E. (2016). Wavelet-based time-frequency representations for automatic recognition of emotions from speech. In Speech communication; 12. ITG symposium (pp. 1\u20135). VDE."},{"key":"10047_CR32","doi-asserted-by":"crossref","unstructured":"Wang, S., Soladie, C., & Seguier, R. (2019). Ocae: Organization-controlled autoencoder for unsupervised speech emotion analysis. In 2019 5th International conference on frontiers of signal processing (ICFSP) (pp. 72\u201376). IEEE","DOI":"10.1109\/ICFSP48124.2019.8938073"},{"key":"10047_CR33","doi-asserted-by":"crossref","unstructured":"Yenigalla, P., Kumar, A., Tripathi, S., Singh, C., Kar, S., & Vepa, J. (2018). Speech emotion recognition using spectrogram & phoneme embedding. In Interspeech (pp. 3688\u20133692).","DOI":"10.21437\/Interspeech.2018-1811"},{"key":"10047_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, Q., An, N., Wang, K., Ren, F., & Li, L. (2013). Speech emotion recognition using combination of features. In 2013 Fourth International Conference on intelligent control and information processing (ICICIP) (pp. 523\u2013528). IEEE","DOI":"10.1109\/ICICIP.2013.6568131"},{"key":"10047_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, B., Essl, G., & Provost, E. M. (2015). Recognizing emotion from singing and speaking using shared models. In 2015 International conference on affective computing and intelligent interaction (ACII) (pp. 139\u2013145). IEEE.","DOI":"10.1109\/ACII.2015.7344563"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10047-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10047-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10047-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T14:11:12Z","timestamp":1699625472000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10047-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":34,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["10047"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10047-8","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9]]},"assertion":[{"value":"23 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare. All co-authors have seen and agree with the contents of the manuscript and there is no financial interest to report. We certify that the submission is original work and is not under review at any other publication.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}