{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:27:38Z","timestamp":1757312858315},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"21-23","license":[{"start":{"date-parts":[[2021,7,22]],"date-time":"2021-07-22T00:00:00Z","timestamp":1626912000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,22]],"date-time":"2021-07-22T00:00:00Z","timestamp":1626912000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s11042-021-11210-6","type":"journal-article","created":{"date-parts":[[2021,7,22]],"date-time":"2021-07-22T16:03:41Z","timestamp":1626969821000},"page":"32041-32069","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Modified dense convolutional networks based emotion detection from speech using its paralinguistic features"],"prefix":"10.1007","volume":"80","author":[{"given":"Ritika","family":"Dhiman","sequence":"first","affiliation":[]},{"given":"Gurkanwal Singh","family":"Kang","sequence":"additional","affiliation":[]},{"given":"Varun","family":"Gupta","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,22]]},"reference":[{"key":"11210_CR1","doi-asserted-by":"crossref","unstructured":"Abdelwahab M, Busso C (2018) Study of dense network approaches for speech emotion recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, New York, pp 5084\u20135088","DOI":"10.1109\/ICASSP.2018.8461866"},{"key":"11210_CR2","doi-asserted-by":"publisher","unstructured":"Abdul Qayyum AB, Arefeen A, Shahnaz C (2019) Convolutional Neural Network (CNN) Based Speech-Emotion Recognition. 2019 IEEE International Conference on Signa-Processing, Information, Communication & Systems (SPICSCON), Dhaka, Bangladesh, pp 122\u2013125. https:\/\/doi.org\/10.1109\/SPICSCON48833.2019.9065172","DOI":"10.1109\/SPICSCON48833.2019.9065172"},{"key":"11210_CR3","doi-asserted-by":"crossref","unstructured":"Arora P, Chaspari T (2019) Exploring siamese neural network architectures for preserving speaker identity in speech emotion classification. In: Proceedings of the 4th International Workshop on Multimodal Analyses Enabling Artificial Agents in Human-Machine Interaction, pp 15\u201318. ACM, New York","DOI":"10.1145\/3279972.3279980"},{"key":"11210_CR4","doi-asserted-by":"crossref","unstructured":"Barsoum E, Zhang C, Ferrer CC, Zhang Z (2016) Training deep networks for facial expression recognition with crowd-sourced label distribution. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction. ACM, New York, pp 279\u2013283","DOI":"10.1145\/2993148.2993165"},{"key":"11210_CR5","doi-asserted-by":"publisher","unstructured":"Birhala A, Ristea CN, Radoi A, Dutu LC (2020) Temporal aggregation of audio-visual modalities for emotion recognition. 2020 43rd International Conference on Telecommunications and Signal Processing (TSP), Milan, Italy, pp 305\u2013308. https:\/\/doi.org\/10.1109\/TSP49548.2020.9163474","DOI":"10.1109\/TSP49548.2020.9163474"},{"key":"11210_CR6","doi-asserted-by":"crossref","unstructured":"Blouin C, Mafolo V (2005) A study on the automatic detection and characterization of emotion in a voice service context. In: Ninth European Conference on Speech Communication and Technology","DOI":"10.21437\/Interspeech.2005-318"},{"key":"11210_CR7","doi-asserted-by":"crossref","unstructured":"Bothe C, Magg S, Weber C, Wermter S (2018) Conversational analysis using utterance-level attention-based bidirectional recurrent neural networks. arXiv preprint arXiv:1805.06242","DOI":"10.21437\/Interspeech.2018-2527"},{"key":"11210_CR8","doi-asserted-by":"crossref","unstructured":"Burkhardt F, Paeschke A, Rolfes M, Sendlmeier WF, Weiss B (2005) A database of German emotional speech. In: Ninth European Conference on Speech Communication and Technology","DOI":"10.21437\/Interspeech.2005-446"},{"key":"11210_CR9","doi-asserted-by":"crossref","unstructured":"Burmania A, Busso C (2017) A stepwise analysis of aggregated crowdsourced labels describing multimodal emotional behaviors. In: INTERSPEECH, pp 152\u2013156","DOI":"10.21437\/Interspeech.2017-1278"},{"issue":"4","key":"11210_CR10","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1109\/TAFFC.2014.2336244","volume":"5","author":"H Cao","year":"2014","unstructured":"Cao H, Cooper DG, Keutmann MK, Gur RC, Nenkova A, Verma R (2014) \u2018CREMA-D\u2019: crowd-sourced emotional multimodal actors dataset. IEEE Trans Affect Comput 5(4):377\u2013390","journal-title":"IEEE Trans Affect Comput"},{"issue":"1","key":"11210_CR11","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1121\/1.413664","volume":"98","author":"KE Cummings","year":"1995","unstructured":"Cummings KE, Clements MA (1995) Analysis of the glottal excitation of emotionally styled and stressed speech. J Acoust Soc Am 98(1):88\u201398","journal-title":"J Acoust Soc Am"},{"key":"11210_CR12","doi-asserted-by":"publisher","unstructured":"Dai D, Wu Z, Li R, Wu X, Jia J, Meng H (2019) Learning discriminative features from spectrograms using center loss for speech emotion recognition. ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United Kingdom, pp 7405\u20137409. https:\/\/doi.org\/10.1109\/ICASSP.2019.8683765","DOI":"10.1109\/ICASSP.2019.8683765"},{"key":"11210_CR13","doi-asserted-by":"publisher","unstructured":"Doerfler M, Grill T (2017) Inside the spectrogram: Convolutional neural networks in audio processing. https:\/\/doi.org\/10.1109\/SAMPTA.2017.8024472","DOI":"10.1109\/SAMPTA.2017.8024472"},{"key":"11210_CR14","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neunet.2017.02.013","volume":"92","author":"HM Fayek","year":"2017","unstructured":"Fayek HM, Lech M, Cavedon L (2017) Evaluating deep learning architectures for speech emotion recognition. Neural Netw 92:60\u201368","journal-title":"Neural Netw"},{"key":"11210_CR15","unstructured":"Fourier Analysis and Synthesis (2018) Hyperphysics.Phy-Astr.Gsu.Edu. http:\/\/hyperphysics.phy-astr.gsu.edu\/hbase\/Audio\/fourier.html#c1. Accessed 21 Nov 2018"},{"key":"11210_CR16","doi-asserted-by":"publisher","DOI":"10.1177\/2398212818812628","author":"E Fox","year":"2018","unstructured":"Fox E (2018) Perspectives from affective science on understanding the nature of emotion. Brain Neurosci Adv. https:\/\/doi.org\/10.1177\/2398212818812628","journal-title":"Brain Neurosci Adv"},{"key":"11210_CR17","doi-asserted-by":"publisher","unstructured":"Ghaleb E, Popa M, Asteriadis S (2019) Multimodal and temporal perception of audio-visual cues for emotion recognition. 2019 8th International Conference on Affective Computing and Interaction I (ACII), Cambridge, United Kingdom, pp 552\u201355. https:\/\/doi.org\/10.1109\/ACII.2019.8925444","DOI":"10.1109\/ACII.2019.8925444"},{"key":"11210_CR18","unstructured":"Gulcehre C, Moczulski M, Bengio Y (2014) Adasecant: robust adaptive secant method for stochastic gradient. arXiv preprint arXiv:1412.7419"},{"key":"11210_CR19","doi-asserted-by":"crossref","unstructured":"Gulcehre C, Sotelo J, Moczulski M, Bengio Y (2017) A robust adaptive stochastic gradient method for deep learning. arXiv preprint arXiv:1703.00788","DOI":"10.1109\/IJCNN.2017.7965845"},{"issue":"4","key":"11210_CR20","doi-asserted-by":"publisher","first-page":"1887","DOI":"10.3390\/en6041887","volume":"6","author":"F Guo-Feng","year":"2013","unstructured":"Guo-Feng F, Qing S, Wang H, Hong W-C, Li H-J (2013) Support vector regression model based on empirical mode decomposition and auto regression for electric load forecasting. Energies 6(4):1887\u20131901","journal-title":"Energies"},{"key":"11210_CR21","doi-asserted-by":"publisher","first-page":"958","DOI":"10.1016\/j.neucom.2015.08.051","volume":"173","author":"F Guo-Feng","year":"2016","unstructured":"Guo-Feng F, Peng L-L, Hong W-C, Sun F (2016) Electric load forecasting by the SVR model with differential empirical mode decomposition and auto regression. Neurocomputing 173:958\u2013970","journal-title":"Neurocomputing"},{"issue":"5","key":"11210_CR22","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1002\/for.2655","volume":"39","author":"F Guo-Feng","year":"2020","unstructured":"Guo-Feng F, Guo Y-H, Zheng J-M, Hong W-C (2020) A generalized regression model based on hybrid empirical mode decomposition and support vector regression with back propagation neural network for mid-short term load forecasting. Journal of Forecasting 39(5):737\u2013756","journal-title":"Journal of Forecasting"},{"key":"11210_CR23","doi-asserted-by":"publisher","first-page":"102320","DOI":"10.1016\/j.scs.2020.102320","volume":"61","author":"F Guo-Feng","year":"2020","unstructured":"Guo-Feng F, Wei X, Li Y-T, Hong W-C (2020) Forecasting electricity consumption using a novel hybrid model. Sustain Cities Soc 61:102320","journal-title":"Sustain Cities Soc"},{"key":"11210_CR24","unstructured":"Hannun A, Case C, Casper J, Catanzaro B et al (2014) Deep Speech: Scaling Up End-to-End Speech Recognition. CoRR, arXiv:1412.5567"},{"key":"11210_CR25","doi-asserted-by":"crossref","unstructured":"Hong W-C, Fan G-F (2019) Hybrid empirical mode decomposition with support vector regression model for short term load forecasting. Energies 12(6):1093","DOI":"10.3390\/en12061093"},{"key":"11210_CR26","doi-asserted-by":"crossref","unstructured":"Huang C-W, Narayanan SS (2016) Attention Assisted discovery of sub-utterance structure in speech emotion recognition. In: Proceedings of Interspeech, pp 1387\u20131391","DOI":"10.21437\/Interspeech.2016-448"},{"key":"11210_CR27","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. 2017 IEEE Conference on Computer Vision and Recognition P (CVPR), Honolulu HI, pp 2261\u20132269. https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"11210_CR28","doi-asserted-by":"publisher","first-page":"357","DOI":"10.3389\/fpubh.2020.00357","volume":"8","author":"C Iwendi","year":"2020","unstructured":"Iwendi C, Bashir AK, Peshkar A, Sujatha R, Chatterjee JM, Pasupuleti S, Mishra R, Pillai S, Jo O (2020) COVID-19 Patient Health Prediction Using Boosted Random Forest Algorithm. Front Public Health 8:357. https:\/\/doi.org\/10.3389\/fpubh.2020.00357","journal-title":"Front Public Health"},{"key":"11210_CR29","volume-title":"Surrey Audio-Visual Expressed Emotion (SAVEE) Database","author":"P Jackson","year":"2014","unstructured":"Jackson P, Haq S (2014) Surrey Audio-Visual Expressed Emotion (SAVEE) Database. University of Surrey, Guildford"},{"key":"11210_CR30","unstructured":"Jozefowicz R, Vinyals O, Schuster M, Shazeer N, Wu Y (2016) Exploring the Limits of Language Modeling. arXiv:1602.02410 [cs]"},{"key":"11210_CR31","doi-asserted-by":"crossref","unstructured":"Lakomkin E, Zamani MA, Weber C, Magg S, Wermter S (2018) Emorl: continuous acoustic emotion classification using deep reinforcement learning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA). IEEE, New York, pp 1\u20136","DOI":"10.1109\/ICRA.2018.8461058"},{"key":"11210_CR32","doi-asserted-by":"crossref","unstructured":"Lee J, Tashev I (2015) High-level feature representation using recurrent neural network for speech emotion recognition. In: INTERSPEECH, pp 1537\u20131540","DOI":"10.21437\/Interspeech.2015-336"},{"issue":"4","key":"11210_CR33","doi-asserted-by":"publisher","first-page":"2579","DOI":"10.1007\/s11071-019-05149-5","volume":"97","author":"M-W Li","year":"2019","unstructured":"Li M-W, Geng J, Zhang Wei-ChiangHLi-Dong (2019) Periodogram estimation based on LSSVR-CCPSO compensation for forecasting ship motion. Nonlinear Dyn 97(4):2579\u20132594","journal-title":"Nonlinear Dyn"},{"key":"11210_CR34","unstructured":"Martens J (2010) Deep learning via hessian-free optimization. In Proceedings of the 27th International Conference on Machine Learning (ICML-10), pp 735\u2013742"},{"key":"11210_CR35","doi-asserted-by":"publisher","unstructured":"McFee B, Colin R, Liang D, Ellis D, Mcvicar M, Battenberg E, Nieto O (2015) librosa: Audio and music signal analysis in python, pp 18-24. https:\/\/doi.org\/10.25080\/Majora-7b98e3ed-003","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"11210_CR36","doi-asserted-by":"crossref","unstructured":"Neiberg D, Elenius K, Karlsson I, Laskowski K (2006) Emotion recognition in spontaneous speech. In: Proceedings of Fonetik, pp 101\u2013104","DOI":"10.21437\/Interspeech.2006-277"},{"key":"11210_CR37","doi-asserted-by":"crossref","unstructured":"Oudeyer PY (2002) Novel useful features and algorithms for the recognition of emotions in human speech. In: Speech Prosody 2002, International Conference","DOI":"10.21437\/SpeechProsody.2002-122"},{"key":"11210_CR38","unstructured":"Radford A, Jozefowicz R, Sutskever I (2017) Learning to Generate Reviews and Discovering Sentiment. arXiv:1704.01444 [cs]"},{"issue":"01","key":"11210_CR39","doi-asserted-by":"publisher","first-page":"85","DOI":"10.4236\/jbise.2010.31013","volume":"3","author":"G Ravindran","year":"2010","unstructured":"Ravindran G, Shenbagadevi S, Selvam VS (2010) Cepstral and linear prediction techniques for improving intelligibility and audibility of impaired speech. J Biomed Sci Eng 3(01):85","journal-title":"J Biomed Sci Eng"},{"key":"11210_CR40","doi-asserted-by":"crossref","unstructured":"Sauter DA, Eisner F, Ekman P, Scott SK (2010) Cross-cultural recognition of basic emotions through nonverbal emotional vocalizations. Proc Natl Acad Sci 107(6):2408\u20132412","DOI":"10.1073\/pnas.0908239106"},{"issue":"1\u20132","key":"11210_CR41","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0167-6393(02)00084-5","volume":"40","author":"KR Scherer","year":"2003","unstructured":"Scherer KR (2003) Vocal communication of emotion: a review of research paradigms. Speech Commun 40(1\u20132):227\u2013256","journal-title":"Speech Commun"},{"key":"11210_CR42","doi-asserted-by":"publisher","unstructured":"Singh R, Puri H, Aggarwal N, Gupta V (2020) An efficient language-independent acoustic emotion classification system. Arab J Sci Eng 45:3111\u20133121. https:\/\/doi.org\/10.1007\/s13369-019-04293-9. Accessed 7 Oct 2020","DOI":"10.1007\/s13369-019-04293-9"},{"key":"11210_CR43","doi-asserted-by":"publisher","unstructured":"Smith LN (2017) Cyclical learning rates for training neural networks. IEEE Winter Conference on Applications of Computer Vision (WACV), Santa Rosa, CA, pp 464\u2013472. https:\/\/doi.org\/10.1109\/WACV.2017.58","DOI":"10.1109\/WACV.2017.58"},{"key":"11210_CR44","unstructured":"Smith LN (2018) A disciplined approach to neural network hyper-parameters: Part 1: learning rate, batch size, momentum, and weight decay. http:\/\/arxiv.org\/abs\/1803.09820"},{"key":"11210_CR45","doi-asserted-by":"publisher","unstructured":"Smith LN, Topin N (2019) Super-convergence: very fast training of neural networks using large learning rates. Proc SPIE 11006, Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications, 1100612. https:\/\/doi.org\/10.1117\/12.2520589","DOI":"10.1117\/12.2520589"},{"issue":"9","key":"11210_CR46","doi-asserted-by":"publisher","first-page":"1162","DOI":"10.1016\/j.specom.2006.04.003","volume":"48","author":"D Ververidis","year":"2006","unstructured":"Ververidis D, Kotropoulos C (2006) Emotional speech recognition: resources, features, and methods. Speech Commun 48(9):1162\u20131181","journal-title":"Speech Commun"},{"key":"11210_CR47","first-page":"217","volume-title":"International Tutorial and Research Workshop on Perception and Interactive Technologies for Speech-Based Systems","author":"B Vlasenko","year":"2008","unstructured":"Vlasenko B, Schuller B, Wendemuth A, Rigoll G (2008) On the influence of phonetic content variation for acoustic emotion recognition. In: International Tutorial and Research Workshop on Perception and Interactive Technologies for Speech-Based Systems. Springer, Berlin, pp 217\u2013220"},{"key":"11210_CR48","doi-asserted-by":"crossref","unstructured":"Wang ZQ, Tashev I (2017) Learning utterance-level representations for speech emotion and age\/gender recognition using deep neural networks. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, New York, pp 5150\u20135154","DOI":"10.1109\/ICASSP.2017.7953138"},{"key":"11210_CR49","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M et al (2016) Google\u2019s Neural Machine Translation System: Bridging the Gap between Human and Machine Translation. arXiv:1609.08144 [cs]"},{"key":"11210_CR50","doi-asserted-by":"publisher","unstructured":"Wu S, Zhong S, Liu Y (2017) Deep residual learning for image analysis. Multimed Tools Appl:1\u201317. https:\/\/doi.org\/10.1007\/s11042-017-4440-4","DOI":"10.1007\/s11042-017-4440-4"},{"key":"11210_CR51","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/j.neucom.2020.05.075","volume":"410","author":"Z Zhang","year":"2020","unstructured":"Zhang Z, Ding S, Sun Y (2020) A support vector regression model hybridized with chaotic krill herd algorithm and empirical mode decomposition for regression task. Neurocomputing 410:185\u2013201","journal-title":"Neurocomputing"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11210-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11210-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11210-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T21:17:11Z","timestamp":1725484631000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11210-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,22]]},"references-count":51,"journal-issue":{"issue":"21-23","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["11210"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11210-6","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2021,7,22]]},"assertion":[{"value":"3 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 June 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 July 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}