{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T23:04:44Z","timestamp":1772147084227,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T00:00:00Z","timestamp":1700006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T00:00:00Z","timestamp":1700006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10772-023-10063-8","type":"journal-article","created":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T11:02:19Z","timestamp":1700046139000},"page":"947-961","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Optimized cross-corpus speech emotion recognition framework based on normalized 1D convolutional neural network with data augmentation and feature selection"],"prefix":"10.1007","volume":"26","author":[{"given":"Nishant","family":"Barsainyan","sequence":"first","affiliation":[]},{"given":"Dileep Kumar","family":"Singh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,15]]},"reference":[{"issue":"10","key":"10063_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Mohamed, A. R., Jiang, H., Deng, L., Penn, G., & Yu, D. (2014). Convolutional neural networks for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(10), 1533\u20131545.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10063_CR2","doi-asserted-by":"publisher","first-page":"108046","DOI":"10.1016\/j.apacoust.2021.108046","volume":"179","author":"J Ancilin","year":"2021","unstructured":"Ancilin, J., & Milton, A. (2021). Improved speech emotion recognition with Mel frequency magnitude coefficient. Applied Acoustics, 179, 108046.","journal-title":"Applied Acoustics"},{"issue":"18","key":"10063_CR3","doi-asserted-by":"publisher","first-page":"5212","DOI":"10.3390\/s20185212","volume":"20","author":"T Anvarjon","year":"2020","unstructured":"Anvarjon, T., & Kwon, S. (2020). Deep-net: A lightweight CNN-based speech emotion recognition system using deep frequency features. Sensors, 20(18), 5212.","journal-title":"Sensors"},{"key":"10063_CR4","doi-asserted-by":"crossref","unstructured":"Bertero, D., & Fung, P. (2017). A first look into a convolutional neural network for speech emotion detection. In 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp.\u00a05115\u20135119). IEEE.","DOI":"10.1109\/ICASSP.2017.7953131"},{"key":"10063_CR5","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1016\/j.procs.2016.08.239","volume":"96","author":"R Chakraborty","year":"2016","unstructured":"Chakraborty, R., Pandharipande, M., & Kopparapu, S. K. (2016). Knowledge-based framework for intelligent emotion recognition in spontaneous speech. Procedia Computer Science, 96, 587\u2013596.","journal-title":"Procedia Computer Science"},{"key":"10063_CR6","doi-asserted-by":"crossref","unstructured":"Chalapathi, M. M., Kumar, M. R., Sharma, N., & Shitharth, S. (2022). Ensemble learning by high-dimensional acoustic features for emotion recognition from speech audio signal. Security and Communication Networks, 2022.","DOI":"10.1155\/2022\/8777026"},{"key":"10063_CR7","doi-asserted-by":"crossref","unstructured":"Chatterjee, J., Mukesh, V., Hsu, H. H., Vyas, G., & Liu, Z. (2018). Speech emotion recognition using cross-correlation and acoustic features. In 2018 IEEE 16th international conference on dependable, autonomic and secure computing, 16th international conference on pervasive intelligence and computing, 4th international conference on big data intelligence and computing and cyber science and technology congress. (DASC\/PiCom\/DataCom\/CyberSciTech) 2018 (pp. 243\u2013249). IEEE.","DOI":"10.1109\/DASC\/PiCom\/DataCom\/CyberSciTec.2018.00050"},{"issue":"1","key":"10063_CR8","first-page":"012003","volume":"2236","author":"RR Choudhary","year":"2022","unstructured":"Choudhary, R. R., Meena, G., & Mohbey, K. K. (2022). Speech emotion based sentiment recognition using deep neural networks. Journal of Physics: Conference Series, 2236(1), 012003.","journal-title":"Journal of Physics: Conference Series"},{"issue":"1","key":"10063_CR9","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/79.911197","volume":"18","author":"R Cowie","year":"2001","unstructured":"Cowie, R., Douglas-Cowie, E., Tsapatsoulis, N., Votsis, G., Kollias, S., Fellenz, W., & Taylor, J. G. (2001). Emotion recognition in human-computer interaction. IEEE Signal Processing Magazine, 18(1), 32\u201380.","journal-title":"IEEE Signal Processing Magazine"},{"key":"10063_CR11","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neunet.2017.02.013","volume":"92","author":"HM Fayek","year":"2017","unstructured":"Fayek, H. M., Lech, M., & Cavedon, L. (2017). Evaluating deep learning architectures for speech emotion recognition. Neural Networks, 92, 60\u201368.","journal-title":"Neural Networks"},{"key":"10063_CR12","unstructured":"Goel, S., & Beigi, H. (2020). Cross lingual cross corpus speech emotion recognition.\u00a0arXiv preprint arXiv:2003.07996."},{"key":"10063_CR13","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa, D., Demirci, M. F., & Yazici, A. (2020). Speech emotion recognition with deep convolutional neural networks. Biomedical Signal Processing and Control, 59, 101894.","journal-title":"Biomedical Signal Processing and Control"},{"issue":"3","key":"10063_CR14","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s00138-022-01294-x","volume":"33","author":"R Jahangir","year":"2022","unstructured":"Jahangir, R., Teh, Y. W., Mujtaba, G., Alroobaea, R., Shaikh, Z. H., & Ali, I. (2022). Convolutional neural network-based cross-corpus speech emotion recognition with data augmentation and features fusion. Machine Vision and Applications, 33(3), 41.","journal-title":"Machine Vision and Applications"},{"key":"10063_CR15","doi-asserted-by":"crossref","unstructured":"Kim, J., & Saurous, R. A. (2018). Emotion recognition from human speech using temporal information and deep learning. In Interspeech (pp.\u00a0937\u2013940).","DOI":"10.21437\/Interspeech.2018-1132"},{"key":"10063_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s10772-020-09792-x","volume":"24","author":"U Kumaran","year":"2021","unstructured":"Kumaran, U., Radha Rammohan, S., Nagarajan, S. M., & Prathik, A. (2021). Fusion of Mel and gammatone frequency cepstral coefficients for speech emotion recognition using deep C-RNN. International Journal of Speech Technology, 24, 303\u2013314.","journal-title":"International Journal of Speech Technology"},{"key":"10063_CR17","doi-asserted-by":"crossref","unstructured":"Latif, S., Qayyum, A., Usman, M., & Qadir, J. (2018). Cross lingual speech emotion recognition: Urdu vs. Western languages. In 2018 international conference on frontiers of information technology (FIT) (pp. 88\u201393). IEEE.","DOI":"10.1109\/FIT.2018.00023"},{"key":"10063_CR18","doi-asserted-by":"crossref","unstructured":"Latif, S., Rana, R., Younis, S., Qadir, J. & Epps, J. (2018). Transfer learning for improving speech emotion classification accuracy.\u00a0arXiv preprint arXiv:1801.06353.","DOI":"10.21437\/Interspeech.2018-1625"},{"key":"10063_CR19","doi-asserted-by":"crossref","unstructured":"McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015). Librosa: Audio and music signal analysis in Python. In Proceedings of the 14th Python in science conference (Vol. 8, pp. 18\u201325).","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"10063_CR20","doi-asserted-by":"crossref","unstructured":"Nantasri, P., Phaisangittisagul, E., Karnjana, J., Boonkla, S., Keerativittayanun, S., Rugchatjaroen, A., & Shinozaki, T. (2020). A light-weight artificial neural network for speech emotion recognition using average values of MFCCs and their derivatives. In 2020 17th international conference on electrical engineering\/electronics, computer, telecommunications and information technology (ECTI-CON). IEEE.","DOI":"10.1109\/ECTI-CON49241.2020.9158221"},{"key":"10063_CR21","doi-asserted-by":"crossref","unstructured":"Neumann, M., & Vu, N. T. (2017). Attentive convolutional neural network based speech emotion recognition: A study on the impact of input features, signal length, and acted speech.\u00a0arXiv preprint arXiv:1706.00612","DOI":"10.21437\/Interspeech.2017-917"},{"key":"10063_CR22","unstructured":"Parlak, C., Diri, B., & G\u00fcrgen, F. (2014). A cross-corpus experiment in speech emotion recognition. In SLAM@ INTERSPEECH (pp.\u00a058\u201361)."},{"key":"10063_CR23","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1007\/s12652-021-02979-3","volume":"13","author":"N Patel","year":"2022","unstructured":"Patel, N., Patel, S., & Mankad, S. H. (2022). Impact of autoencoder based compact representation on emotion detection from audio. Journal of Ambient Intelligence and Humanized Computing, 13, 867\u2013885.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"10063_CR24","doi-asserted-by":"publisher","first-page":"15563","DOI":"10.1007\/s11042-020-10329-2","volume":"80","author":"MD Pawar","year":"2021","unstructured":"Pawar, M. D., & Kokate, R. D. (2021). Convolution neural network based automatic speech emotion recognition using Mel-frequency Cepstrum coefficients. Multimedia Tools and Applications, 80, 15563\u201315587.","journal-title":"Multimedia Tools and Applications"},{"key":"10063_CR25","volume-title":"Digital processing of speech signals","author":"LR Rabiner","year":"1978","unstructured":"Rabiner, L. R. (1978). Digital processing of speech signals. Pearson Education India."},{"key":"10063_CR26","unstructured":"Roberts, L. S. (2012). A forensic phonetic study of the vocal responses of individuals in distress. Doctoral dissertation, University of York."},{"issue":"6","key":"10063_CR27","doi-asserted-by":"publisher","first-page":"1632","DOI":"10.1121\/1.1913513","volume":"53","author":"M Rothenberg","year":"1973","unstructured":"Rothenberg, M. (1973). A new inverse-filtering technique for deriving the glottal air flow waveform during voicing. The Journal of the Acoustical Society of America, 53(6), 1632\u20131645.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"10063_CR28","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.specom.2018.07.002","volume":"104","author":"A Sasou","year":"2018","unstructured":"Sasou, A. (2018). Glottal inverse filtering by combining a constrained LP and an HMM-based generative model of glottal flow derivative. Speech Communication, 104, 113\u2013128.","journal-title":"Speech Communication"},{"key":"10063_CR29","doi-asserted-by":"crossref","unstructured":"Schuller, B., Zhang, Z., Weninger, F., & Rigoll, G. (2011). Using multiple databases for training in emotion recognition: To unite or to vote? In Twelfth annual conference of the international speech communication association (Interspeech).","DOI":"10.21437\/Interspeech.2011-468"},{"issue":"2","key":"10063_CR30","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"1","author":"B Schuller","year":"2010","unstructured":"Schuller, B., Vlasenko, B., Eyben, F., W\u00f6llmer, M., Stuhlsatz, A., Wendemuth, A., & Rigoll, G. (2010). Cross-corpus acoustic emotion recognition: Variances and strategies. IEEE Transactions on Affective Computing, 1(2), 119\u2013131.","journal-title":"IEEE Transactions on Affective Computing"},{"issue":"7","key":"10063_CR31","doi-asserted-by":"publisher","first-page":"1955","DOI":"10.1007\/s11760-022-02156-9","volume":"16","author":"A Shilandari","year":"2022","unstructured":"Shilandari, A., Marvi, H., Khosravi, H., & Wang, W. (2022). Speech emotion recognition using data augmentation method by cycle-generative adversarial networks. Signal, Image and Video Processing, 16(7), 1955\u20131962.","journal-title":"Signal, Image and Video Processing"},{"key":"10063_CR32","doi-asserted-by":"crossref","unstructured":"Stuhlsatz, A., Meyer, C., Eyben, F., Zielke, T., Meier, G. & Schuller, B. (2011). Deep neural networks for acoustic emotion recognition: Raising the benchmarks. In 2011 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp.\u00a05688\u20135691). IEEE.","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"10063_CR33","doi-asserted-by":"publisher","first-page":"564","DOI":"10.1109\/ACCESS.2021.3136251","volume":"10","author":"S Sultana","year":"2021","unstructured":"Sultana, S., Iqbal, M. Z., Selim, M. R., Rashid, M. M., & Rahman, M. S. (2021). Bangla speech emotion recognition and cross-lingual study using deep CNN and BLSTM networks. IEEE Access, 10, 564\u2013578.","journal-title":"IEEE Access"},{"key":"10063_CR10","unstructured":"TESS Dataset. (2022). Retrieved March 3, 2022, from https:\/\/tspace.library.utoronto.ca\/handle\/1807\/24487."},{"key":"10063_CR34","doi-asserted-by":"crossref","unstructured":"Trigeorgis, G., Ringeval, F., Brueckner, R., Marchi, E., Nicolaou, M. A., Schuller, B., & Zafeiriou, S. (2016). Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP 2016) (pp. 5200\u20135204). IEEE.","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"10063_CR35","unstructured":"Vogt, T., & Andr\u00e9, E. (2006). Improving automatic emotion recognition from speech via gender differentiation. In Proceedings of language resources and evaluation conference."},{"key":"10063_CR36","doi-asserted-by":"crossref","unstructured":"Vryzas, N., Matsiola, M., Kotsakis, R., Dimoulas, C., & Kalliris, G. (2018). Subjective evaluation of a speech emotion recognition interaction framework. In Proceedings of the audio mostly 2018 on sound in immersion and emotion (pp. 1\u20137).","DOI":"10.1145\/3243274.3243294"},{"key":"10063_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2021\/2633021","volume":"2021","author":"L Wang","year":"2021","unstructured":"Wang, L., Gao, Y., Li, J., & Wang, X. (2021). A feature selection method by using chaotic cuckoo search optimization algorithm with elitist preservation and uniform mutation for data classification. Discrete Dynamics in Nature and Society, 2021, 1\u201319.","journal-title":"Discrete Dynamics in Nature and Society"},{"issue":"1","key":"10063_CR38","doi-asserted-by":"publisher","first-page":"012085","DOI":"10.1088\/1742-6596\/1453\/1\/012085","volume":"1453","author":"S Wei","year":"2020","unstructured":"Wei, S., Zou, S., & Liao, F. (2020). A comparison on data augmentation methods based on deep learning for audio classification. Journal of Physics Conference Series, 1453(1), 012085.","journal-title":"Journal of Physics Conference Series"},{"key":"10063_CR39","doi-asserted-by":"publisher","first-page":"74539","DOI":"10.1109\/ACCESS.2021.3067460","volume":"9","author":"M Xu","year":"2021","unstructured":"Xu, M., Zhang, F., & Zhang, W. (2021). Head fusion: Improving the accuracy and robustness of speech emotion recognition on the IEMOCAP and RAVDESS dataset. IEEE Access, 9, 74539\u201374549.","journal-title":"IEEE Access"},{"issue":"3","key":"10063_CR40","doi-asserted-by":"publisher","first-page":"70","DOI":"10.3390\/a13030070","volume":"13","author":"K Zvarevashe","year":"2020","unstructured":"Zvarevashe, K., & Olugbara, O. (2020). Ensemble learning of hybrid acoustic features for speech emotion recognition. Algorithms, 13(3), 70.","journal-title":"Algorithms"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10063-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10063-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10063-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T10:13:56Z","timestamp":1704968036000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10063-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,15]]},"references-count":40,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["10063"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10063-8","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,15]]},"assertion":[{"value":"18 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 October 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All contributors agreed and given consent to Publish.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}