{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,28]],"date-time":"2025-12-28T10:31:43Z","timestamp":1766917903108,"version":"3.37.3"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,6,15]],"date-time":"2022-06-15T00:00:00Z","timestamp":1655251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,15]],"date-time":"2022-06-15T00:00:00Z","timestamp":1655251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s11042-022-13358-1","type":"journal-article","created":{"date-parts":[[2022,6,15]],"date-time":"2022-06-15T03:33:08Z","timestamp":1655263988000},"page":"5521-5546","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Tri-integrated convolutional neural network for audio image classification using Mel-frequency spectrograms"],"prefix":"10.1007","volume":"82","author":[{"given":"Aayush","family":"Khurana","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sweta","family":"Mittal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6690-8500","authenticated-orcid":false,"given":"Deepika","family":"Kumar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sonali","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ayushi","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,15]]},"reference":[{"key":"13358_CR1","doi-asserted-by":"crossref","unstructured":"Akyol K (2020) Stacking ensemble based deep neural networks modelling for effective epileptic seizure detection. Expert Syst Appl 148:113239","DOI":"10.1016\/j.eswa.2020.113239"},{"key":"13358_CR2","unstructured":"Arriaga O, Valdenegro-Toro M, Pl\u00f6ger P (2017) Realtime convolutional neural networks for emotion and gender classification. arXiv preprint arXiv:1710.07557"},{"key":"13358_CR3","doi-asserted-by":"crossref","unstructured":"Badshah AM, Ahmad J, Rahim N, Baik SW (2017) Speech emotion recognition from spectrograms with deep convolutional neural network. In: 2017 international conference on platform technology and service (PlatCon). IEEE. (pp. 1-5)","DOI":"10.1109\/PlatCon.2017.7883728"},{"key":"13358_CR4","doi-asserted-by":"crossref","unstructured":"Beard R, Das R, Ng RW, Gopalakrishnan PK, Eerens L, Swietojanski P, Miksik O (2018) Multi-modal sequence fusion via recursive attention for emotion recognition. In: Proceedings of the 22nd Conference on Computational Natural Language Learning (pp. 251\u2013259)","DOI":"10.18653\/v1\/K18-1025"},{"key":"13358_CR5","doi-asserted-by":"crossref","unstructured":"Bloch S, Lemeignan M, Aguilera-T N (1991) Specific respiratory patterns distinguish between basic human emotions. Int J Psychophysiol 11(2):141\u2013154","DOI":"10.1016\/0167-8760(91)90006-J"},{"key":"13358_CR6","doi-asserted-by":"crossref","unstructured":"Bourbakis N, Esposito A, Kavraki D (2010) Extracting and associating meta-features for understanding people\u2019s emotional behaviour: face and speech. Cogn Comput 3(3):436\u2013448","DOI":"10.1007\/s12559-010-9072-1"},{"issue":"3","key":"13358_CR7","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1016\/S0167-6393(96)00063-5","volume":"20","author":"AR Bradlow","year":"1996","unstructured":"Bradlow AR, Torretta GM, Pisoni DB (1996) Intelligibility of normal speech I: global and fine-grained acoustic-phonetic talker characteristics. Speech Comm 20(3):255\u2013272","journal-title":"Speech Comm"},{"key":"13358_CR8","doi-asserted-by":"publisher","first-page":"722","DOI":"10.1109\/LSP.2022.3151551","volume":"29","author":"N Braunschweiler","year":"2022","unstructured":"Braunschweiler N, Doddipatla R, Keizer S, Stoyanchev S (2022) Factors in Emotion Recognition with Deep Learning Models Using Speech and Text on Multiple Corpora. IEEE Signal Processing Letters 29:722\u2013726","journal-title":"IEEE Signal Processing Letters"},{"key":"13358_CR9","doi-asserted-by":"crossref","unstructured":"Burkhardt F, Paeschke A, Rolfes M, Sendlmeier WF, Weiss B (2005) A database of German emotional speech In Ninth European Conference on Speech Communication and Technology","DOI":"10.21437\/Interspeech.2005-446"},{"key":"13358_CR10","doi-asserted-by":"publisher","first-page":"35871","DOI":"10.1007\/s11042-020-09842-1","volume":"80","author":"SW Byun","year":"2020","unstructured":"Byun SW, Lee SP (2020) Human emotion recognition based on the weighted integration method using image sequences and acoustic features. Multimed Tools Appl 80:35871\u201335885","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"13358_CR11","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/T-AFFC.2010.1","volume":"1","author":"RA Calvo","year":"2010","unstructured":"Calvo RA, D'Mello S (2010) Affect detection: an interdisciplinary review of models, methods, and their applications. IEEE Trans Affect Comput 1(1):18\u201337","journal-title":"IEEE Trans Affect Comput"},{"key":"13358_CR12","doi-asserted-by":"crossref","unstructured":"Chatziagapi A, Paraskevopoulos G, Sgouropoulos D, Pantazopoulos G, Nikandrou M, Giannakopoulos T, Katsamanis A, Potamianos A, Narayanan S (2019) Data Augmentation Using GANs for Speech Emotion Recognition. In: INTERSPEECH (pp. 171\u2013175)","DOI":"10.21437\/Interspeech.2019-2561"},{"issue":"2","key":"13358_CR13","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1007\/s12559-009-9016-9","volume":"1","author":"M Chetouani","year":"2009","unstructured":"Chetouani M, Mahdhaoui A, Ringeval F (2009) Time-scale feature extractions for emotional speech characterization. Cogn Comput 1(2):194\u2013201","journal-title":"Cogn Comput"},{"key":"13358_CR14","doi-asserted-by":"crossref","unstructured":"Cummins N, Amiriparian S, Hagerer G, Batliner A, Steidl S, Schuller BW (2017) An image-based deep spectrum feature representation for the recognition of emotional speech. In: Proceedings of the 25th ACM international conference on Multimedia. (pp. 478\u2013484)","DOI":"10.1145\/3123266.3123371"},{"issue":"43","key":"13358_CR15","doi-asserted-by":"publisher","first-page":"32917","DOI":"10.1007\/s11042-020-09693-w","volume":"79","author":"R Dangol","year":"2020","unstructured":"Dangol R, Alsadoon A, Prasad PW, Seher I, Alsadoon OH (2020) Speech emotion recognition using convolutional neural network and long-short term memory. Multimed Tools Appl 79(43):32917\u201332934","journal-title":"Multimed Tools Appl"},{"key":"13358_CR16","doi-asserted-by":"crossref","unstructured":"Deng J, Zhang Z, Marchi E, Schuller B (2013) Sparse autoencoder-based feature transfer learning for speech emotion recognition. In: Human association conference on affective computing and intelligent interaction. IEEE. (pp. 511-516)","DOI":"10.1109\/ACII.2013.90"},{"issue":"7","key":"13358_CR17","doi-asserted-by":"publisher","first-page":"1895","DOI":"10.1162\/089976698300017197","volume":"10","author":"TG Dietterich","year":"1998","unstructured":"Dietterich TG (1998) Approximate statistical tests for comparing supervised classification learning algorithms. Neural Comput 10(7):1895\u20131923","journal-title":"Neural Comput"},{"issue":"3","key":"13358_CR18","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1007\/s12559-009-9017-8","volume":"1","author":"A Esposito","year":"2009","unstructured":"Esposito A (2009) The perceptual and cognitive role of visual and auditory channels in conveying emotional information. Cogn Comput 1(3):268\u2013278","journal-title":"Cogn Comput"},{"key":"13358_CR19","doi-asserted-by":"crossref","unstructured":"Fan Y, Lam JC, Li VO (2018) Video-based emotion recognition using deeply-supervised neural networks. In: Proceedings of the 20th ACM International Conference on Multimodal Interaction. (pp. 584\u2013588)","DOI":"10.1145\/3242969.3264978"},{"issue":"21","key":"13358_CR20","doi-asserted-by":"publisher","first-page":"6008","DOI":"10.3390\/s20216008","volume":"20","author":"M Farooq","year":"2020","unstructured":"Farooq M, Hussain F, Baloch NK, Raja FR, Yu H, Zikria YB (2020) Impact of feature selection algorithm on speech emotion recognition using deep convolutional neural network. Sensors. 20(21):6008","journal-title":"Sensors."},{"key":"13358_CR21","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neunet.2017.02.013","volume":"92","author":"HM Fayek","year":"2017","unstructured":"Fayek HM, Lech M, Cavedon L (2017) Evaluating deep learning architectures for speech emotion recognition. Neural Netw 92:60\u201368","journal-title":"Neural Netw"},{"key":"13358_CR22","volume-title":"Speech analysis synthesis and perception","author":"JL Flanagan","year":"2013","unstructured":"Flanagan JL (2013) Speech analysis synthesis and perception. Springer Science & Business Media"},{"key":"13358_CR23","doi-asserted-by":"crossref","unstructured":"Gonzalez G, De La Rosa JL, Montaner M, Delfin S. (2007) Embedding emotional context in recommender systems. In: IEEE 23rd international conference on data engineering workshop. IEEE. pp. 845-852","DOI":"10.1109\/ICDEW.2007.4401075"},{"key":"13358_CR24","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1007\/978-0-387-30715-2_27","volume-title":"Handbook of the sociology of emotions","author":"J Goodwin","year":"2006","unstructured":"Goodwin J, Jasper JM (2006) Emotions and social movements. In: Handbook of the sociology of emotions. Springer, Boston, pp 611\u2013635"},{"key":"13358_CR25","doi-asserted-by":"crossref","unstructured":"Huang KY, Wu CH, Hong QB, Su MH, Chen YH. (2019) Speech emotion recognition using deep neural network considering verbal and nonverbal speech sounds. In: IEEE International Conference on Acoustics, Speech and Signal Processing. (pp. 5866\u20135870)","DOI":"10.1109\/ICASSP.2019.8682283"},{"key":"13358_CR26","unstructured":"Hussain M, Haque MA (2018) Swishnet: A fast convolutional neural network for speech, music and noise classification and segmentation. arXiv preprint arXiv:1812.00149"},{"key":"13358_CR27","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa D, Demirci MF, Yazici A (2020) Speech emotion recognition with deep convolutional neural networks. Biomed Signal Process Control 59:101894","journal-title":"Biomed Signal Process Control"},{"key":"13358_CR28","doi-asserted-by":"publisher","first-page":"23745","DOI":"10.1007\/s11042-020-09874-7","volume":"80","author":"R Jahangir","year":"2021","unstructured":"Jahangir R, Teh YW, Hanif F, Mujtaba G (2021) Deep learning approaches for speech emotion recognition: state of the art and research challenges. Multimed Tools Appl 80:23745\u201323812","journal-title":"Multimed Tools Appl"},{"key":"13358_CR29","unstructured":"Jiang P, Fu H, Tao H (2019) Speech emotion recognition using deep convolutional neural network and simple recurrent unit. Eng Lett 27(4)"},{"key":"13358_CR30","volume-title":"Expressing emotion: myths, realities, and therapeutic strategies","author":"E Kennedy-Moore","year":"2001","unstructured":"Kennedy-Moore E, Watson JC (2001) Expressing emotion: myths, realities, and therapeutic strategies. Guilford Press"},{"key":"13358_CR31","doi-asserted-by":"publisher","first-page":"142521","DOI":"10.1109\/ACCESS.2020.3012292","volume":"8","author":"D Kumar","year":"2020","unstructured":"Kumar D, Jain N, Khurana A, Mittal S, Satapathy SC, Senkerik R, Hemanth JD (2020) Automatic detection of white blood Cancer from bone marrow microscopic images using convolutional neural networks. IEEE Access 8:142521\u2013142531","journal-title":"IEEE Access"},{"key":"13358_CR32","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s10772-020-09792-x","volume":"24","author":"U Kumaran","year":"2021","unstructured":"Kumaran U, Rammohan SR, Nagarajan SM, Prathik A (2021) Fusion of mel and gammatone frequency cepstral coefficients for speech emotion recognition using deep C-RNN. Int J Speech Technol 24:303\u2013314","journal-title":"Int J Speech Technol"},{"key":"13358_CR33","doi-asserted-by":"crossref","unstructured":"Latif S, Rana R, Younis S, Qadir J, Epps J (2018) Transfer learning for improving speech emotion classification accuracy. arXiv preprint arXiv:1801.06353","DOI":"10.21437\/Interspeech.2018-1625"},{"issue":"27","key":"13358_CR34","doi-asserted-by":"publisher","first-page":"19629","DOI":"10.1007\/s11042-020-08841-6","volume":"79","author":"MC Lee","year":"2020","unstructured":"Lee MC, Chiang SY, Yeh SC, Wen TF (2020) Study on emotion recognition and companion Chatbot using deep neural network. Multimed Tools Appl 79(27):19629\u201319657","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"13358_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1159\/000266337","volume":"47","author":"LH Leeper","year":"1995","unstructured":"Leeper LH, Culatta R (1995) Speech fluency: Effect of age, gender, and context. Folia Phoniatr Logop 47(1):1\u20134","journal-title":"Folia Phoniatr Logop"},{"issue":"2","key":"13358_CR36","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1007\/s12559-017-9533-x","volume":"10","author":"J Li","year":"2017","unstructured":"Li J, Zhang Z, He H (2017) Hierarchical convolutional neural networks for EEG-based emotion recognition. Cogn Comput 10(2):368\u2013380","journal-title":"Cogn Comput"},{"key":"13358_CR37","doi-asserted-by":"crossref","unstructured":"Li S, Zheng W, Zong Y, Lu C, Tang C, Jiang X, Liu J, Xia W (2019) Bi-modality Fusion for Emotion Recognition in the Wild. In: 2019 International Conference on Multimodal Interaction. (pp. 589\u2013594)","DOI":"10.1145\/3340555.3355719"},{"key":"13358_CR38","doi-asserted-by":"crossref","unstructured":"Likitha MS, Gupta SR, Hasitha K, Raju AU (2017) Speech based human emotion recognition using MFCC. In: 2017 international conference on wireless communications, signal processing and networking (WiSPNET). IEEE. (pp. 2257-2260)","DOI":"10.1109\/WiSPNET.2017.8300161"},{"issue":"3","key":"13358_CR39","doi-asserted-by":"publisher","first-page":"1683","DOI":"10.1121\/1.414691","volume":"99","author":"B Lindblom","year":"1996","unstructured":"Lindblom B (1996) Role of articulation in speech perception: clues from production. J Acoust Soc Am 99(3):1683\u20131692","journal-title":"J Acoust Soc Am"},{"issue":"5","key":"13358_CR40","doi-asserted-by":"publisher","first-page":"e0196391","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone SR, Russo FA (2018) The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English. PLoS One 13(5):e0196391","journal-title":"PLoS One"},{"issue":"22","key":"13358_CR41","doi-asserted-by":"publisher","first-page":"7665","DOI":"10.3390\/s21227665","volume":"21","author":"C Luna-Jim\u00e9nez","year":"2021","unstructured":"Luna-Jim\u00e9nez C, Griol D, Callejas Z, Kleinlein R, Montero JM, Fern\u00e1ndez-Mart\u00ednez F (2021) Multimodal emotion recognition on RAVDESS dataset using transfer learning. Sensors. 21(22):7665","journal-title":"Sensors."},{"issue":"1","key":"13358_CR42","doi-asserted-by":"publisher","first-page":"527","DOI":"10.3390\/app12010527","volume":"12","author":"F Ma","year":"2022","unstructured":"Ma F, Li Y, Ni S, Huang S, Zhang L (2022) Data augmentation for audio-visual emotion recognition with an efficient multimodal conditional GAN. Appl Sci 12(1):527","journal-title":"Appl Sci"},{"issue":"8","key":"13358_CR43","doi-asserted-by":"publisher","first-page":"2203","DOI":"10.1109\/TMM.2014.2360798","volume":"16","author":"Q Mao","year":"2014","unstructured":"Mao Q, Dong M, Huang Z, Zhan Y (2014) Learning salient features for speech emotion recognition using convolutional neural networks. IEEE Trans Multimedia 16(8):2203\u20132213","journal-title":"IEEE Trans Multimedia"},{"key":"13358_CR44","doi-asserted-by":"crossref","unstructured":"Mohsin M, Hemavathi D (2020) Emotion speech recognition through deep learning. InInternational conference on computational vision and bio inspired computing 2018 Nov 29 (pp. 1363-1369). Springer, Cham","DOI":"10.1007\/978-3-030-41862-5_140"},{"key":"13358_CR45","doi-asserted-by":"crossref","unstructured":"Nguyen D, Nguyen K, Sridharan S, Ghasemi A, Dean D, Fookes C (2017) Deep spatio-temporal features for multimodal emotion recognition. In: 2017 IEEE winter conference on applications of computer vision (WACV). IEEE. (pp. 1215-1223)","DOI":"10.1109\/WACV.2017.140"},{"key":"13358_CR46","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1002\/int.22291","volume":"36","author":"EN Ocquaye","year":"2021","unstructured":"Ocquaye EN, Mao Q, Xue Y, Song H (2021) Cross lingual speech emotion recognition via triple attentive asymmetric convolutional neural network. Int J Intell Syst 36:53\u201371","journal-title":"Int J Intell Syst"},{"key":"13358_CR47","doi-asserted-by":"crossref","unstructured":"Ouyang X, Kawaai S, Goh EG, Shen S, Ding W, Ming H, Huang DY (2017) Audio-visual emotion recognition using deep transfer learning and multiple temporal models. In: Proceedings of the 19th ACM International Conference on Multimodal Interaction. (pp. 577\u2013582)","DOI":"10.1145\/3136755.3143012"},{"key":"13358_CR48","unstructured":"Peng S, Zhang L, Ban Y, Fang M, Winkler S (2018) A deep network for arousal-valence emotion prediction with acoustic-visual cues. arXiv preprint arXiv:1805.00638"},{"key":"13358_CR49","doi-asserted-by":"crossref","unstructured":"Popova AS, Rassadin AG, Ponomarenko AA (2018) Emotion recognition in sound. In: International conference on neuro informatics 2017 Oct 2. Springer, Cham. (pp. 117-124)","DOI":"10.1007\/978-3-319-66604-4_18"},{"key":"13358_CR50","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.imavis.2018.04.004","volume":"75","author":"P Rodr\u00edguez","year":"2018","unstructured":"Rodr\u00edguez P, Bautista MA, Gonzalez J, Escalera S (2018) Beyond one-hot encoding: lower dimensional target embedding. Image Vis Comput 75:21\u201331","journal-title":"Image Vis Comput"},{"issue":"3","key":"13358_CR51","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/LSP.2017.2657381","volume":"24","author":"J Salamon","year":"2017","unstructured":"Salamon J, Bello JP (2017) Deep convolutional neural networks and data augmentation for environmental sound classification. IEEE Signal Processing Letters 24(3):279\u2013283","journal-title":"IEEE Signal Processing Letters"},{"key":"13358_CR52","doi-asserted-by":"crossref","unstructured":"Satt A, Rozenberg S, Hoory R (2017) Efficient Emotion Recognition from Speech Using Deep Learning on Spectrograms. In INTERSPEECH (pp. 1089\u20131093)","DOI":"10.21437\/Interspeech.2017-200"},{"key":"13358_CR53","unstructured":"Schl\u00fcter J, Grill T (2015) Exploring Data Augmentation for Improved Singing Voice Detection with Neural Networks. In: ISMIR (pp. 121\u2013126)"},{"issue":"1","key":"13358_CR54","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.csl.2012.02.005","volume":"27","author":"B Schuller","year":"2013","unstructured":"Schuller B, Steidl S, Batliner A, Burkhardt F, Devillers L, M\u00fcLler C, Narayanan S (2013) Paralinguistics in speech and language\u2014State-of-the-art and the challenge. Comput Speech Lang 27(1):4\u201339","journal-title":"Comput Speech Lang"},{"key":"13358_CR55","doi-asserted-by":"publisher","first-page":"116080","DOI":"10.1016\/j.eswa.2021.116080","volume":"188","author":"I Shahin","year":"2022","unstructured":"Shahin I, Hindawi N, Nassif AB, Alhudhaif A, Polat K (2022) Novel dual-channel long short-term memory compressed capsule networks for emotion recognition. Expert Syst Appl 188:116080","journal-title":"Expert Syst Appl"},{"key":"13358_CR56","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"9","key":"13358_CR57","doi-asserted-by":"publisher","first-page":"2530","DOI":"10.1587\/transinf.2014EDL8038","volume":"97","author":"P Song","year":"2014","unstructured":"Song P, Jin Y, Zhao L, Xin M (2014) Speech emotion recognition using transfer learning. IEICE Trans Inf Syst 97(9):2530\u20132532","journal-title":"IEICE Trans Inf Syst"},{"key":"13358_CR58","doi-asserted-by":"crossref","unstructured":"Tits N, Haddad KE, Dutoit T. (2018) ASR-based features for emotion recognition: A transfer learning approach. arXiv preprint arXiv:1805.09197","DOI":"10.18653\/v1\/W18-3307"},{"key":"13358_CR59","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1007\/s12652-020-02845-8","volume":"13","author":"S Umer","year":"2021","unstructured":"Umer S, Rout RK, Pero C, Nappi M (2021) Facial expression recognition with trade-offs between data augmentation and deep learning features. J Ambient Intell Humaniz Comput 13:721\u2013735","journal-title":"J Ambient Intell Humaniz Comput"},{"key":"13358_CR60","unstructured":"Venkataramanan K, Rajamohan HR (2019) Emotion Recognition from Speech. arXiv preprint arXiv:1912.10458"},{"issue":"9","key":"13358_CR61","doi-asserted-by":"publisher","first-page":"1162","DOI":"10.1016\/j.specom.2006.04.003","volume":"48","author":"D Ververidis","year":"2006","unstructured":"Ververidis D, Kotropoulos C (2006) Emotional speech recognition: resources, features, and methods. Speech Comm 48(9):1162\u20131181","journal-title":"Speech Comm"},{"issue":"1","key":"13358_CR62","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1186\/s40537-016-0043-6","volume":"3","author":"K Weiss","year":"2016","unstructured":"Weiss K, Khoshgoftaar TM, Wang D (2016) A survey of transfer learning. Journal of Big data 3(1):9","journal-title":"Journal of Big data"},{"key":"13358_CR63","doi-asserted-by":"crossref","unstructured":"Zhang W, Du T, Wang J (2016) Deep learning over multi-field categorical data. In: European conference on information retrieval. Springer, Cham. pp 45\u201357","DOI":"10.1007\/978-3-319-30671-1_4"},{"issue":"6","key":"13358_CR64","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2017","unstructured":"Zhang S, Zhang S, Huang T, Gao W (2017) Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Trans Multimedia 20(6):1576\u20131590","journal-title":"IEEE Trans Multimedia"},{"key":"13358_CR65","doi-asserted-by":"crossref","first-page":"28627","DOI":"10.1007\/s11042-021-10951-8","volume":"80","author":"H Zhang","year":"2021","unstructured":"Zhang H, Huang B, Tian G (2021) Facial expression recognition based on deep convolution long short-term memory networks of double-channel weighted mixture. Pattern recognition letters. 2020 mar 1;131:128-34. Liao, H., Wang, D., Fan, P. et al. deep learning enhanced attributes conditional random forest for robust facial expression recognition. Multimed Tools Appl 80:28627\u201328645","journal-title":"Multimed Tools Appl"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13358-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-13358-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13358-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,25]],"date-time":"2023-01-25T08:23:27Z","timestamp":1674635007000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-13358-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,15]]},"references-count":65,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["13358"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-13358-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,6,15]]},"assertion":[{"value":"12 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they do not have any conflict of interest. This research did not involve any human or animal participation. All authors have checked and agreed on the submission.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors have participated in (a) conception and design, or analysis and interpretation of the data; (b) drafting the article or revising it critically for important intellectual content; and (c) approval of the final version. This manuscript has not been submitted to, nor is under review at, another journal or other publishing venue.All authors have no affiliation with any organization with a direct or indirect financial interest in the subject matter discussed in the manuscript.All authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}