{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:40:16Z","timestamp":1740123616742,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2020,6,13]],"date-time":"2020-06-13T00:00:00Z","timestamp":1592006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,13]],"date-time":"2020-06-13T00:00:00Z","timestamp":1592006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003052","name":"Ministry of Trade, Industry and Energy","doi-asserted-by":"publisher","award":["10073144"],"award-info":[{"award-number":["10073144"]}],"id":[{"id":"10.13039\/501100003052","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2020,10]]},"DOI":"10.1007\/s11227-020-03346-3","type":"journal-article","created":{"date-parts":[[2020,6,13]],"date-time":"2020-06-13T11:02:48Z","timestamp":1592046168000},"page":"8357-8371","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Comparative studies on machine learning for paralinguistic signal compression and classification"],"prefix":"10.1007","volume":"76","author":[{"given":"Seokhyun","family":"Byun","sequence":"first","affiliation":[]},{"given":"Seunghyun","family":"Yoon","sequence":"additional","affiliation":[]},{"given":"Kyomin","family":"Jung","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,6,13]]},"reference":[{"key":"3346_CR1","doi-asserted-by":"crossref","unstructured":"Aldeneh Z, Provost EM (2017) Using regional saliency for speech emotion recognition. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 2741\u20132745","DOI":"10.1109\/ICASSP.2017.7952655"},{"key":"3346_CR2","doi-asserted-by":"crossref","unstructured":"Amiriparian S, Gerczuk M, Ottl S, Cummins N, Freitag M, Pugachevskiy S, Baird A, Schuller BW (2017) Snore sound classification using image-based deep spectrum features. In: INTERSPEECH, pp 3512\u20133516","DOI":"10.21437\/Interspeech.2017-434"},{"key":"3346_CR3","unstructured":"Amodei D, Ananthanarayanan S, Anubhai R, Bai J, Battenberg E, Case C, Casper J, Catanzaro B, Cheng Q, Chen G, et al. (2016) Deep speech 2: end-to-end speech recognition in English and Mandarin. In: International Conference on Machine Learning, pp 173\u2013182"},{"issue":"5","key":"3346_CR4","doi-asserted-by":"publisher","first-page":"3741","DOI":"10.3906\/elk-1903-121","volume":"27","author":"SR Bandela","year":"2019","unstructured":"Bandela SR, Kishpre KT (2019) Speech emotion recognition using semi-NMF feature optimization. Turk J Electr Eng Comput Sci 27(5):3741\u20133757","journal-title":"Turk J Electr Eng Comput Sci"},{"key":"3346_CR5","doi-asserted-by":"crossref","unstructured":"Boser BE, Guyon IM, Vapnik VN (1992) A training algorithm for optimal margin classifiers. In: Proceedings of the Fifth Annual Workshop on Computational Learning Theory. ACM, pp 144\u2013152","DOI":"10.1145\/130385.130401"},{"issue":"4","key":"3346_CR6","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso C, Bulut M, Lee CC, Kazemzadeh A, Mower E, Kim S, Chang JN, Lee S, Narayanan SS (2008) Iemocap: Interactive emotional dyadic motion capture database. Lang Resour Eval 42(4):335","journal-title":"Lang Resour Eval"},{"key":"3346_CR7","doi-asserted-by":"crossref","unstructured":"Byun S, Yoon S, Jung K (2019) Neural networks for compressing and classifying speaker-independent paralinguistic signals. In: 2019 IEEE International Conference on Big Data and Smart Computing (BigComp). IEEE, pp 1\u20134","DOI":"10.1109\/BIGCOMP.2019.8679115"},{"key":"3346_CR8","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C (2016) Xgboost: a scalable tree boosting system. In: Proceedings of the 22nd ACM Sigkdd International Conference on Knowledge Discovery and Data Mining. ACM, pp 785\u2013794","DOI":"10.1145\/2939672.2939785"},{"key":"3346_CR9","doi-asserted-by":"crossref","unstructured":"Chiou BC, Chen CP (2013) Feature space dimension reduction in speech emotion recognition using support vector machine. In: 2013 Asia\u2013Pacific Signal and Information Processing Association Annual Summit and Conference. IEEE, pp 1\u20136","DOI":"10.1109\/APSIPA.2013.6694251"},{"key":"3346_CR10","doi-asserted-by":"publisher","first-page":"247","DOI":"10.21437\/Interspeech.2018-2466","volume":"2018","author":"J Cho","year":"2018","unstructured":"Cho J, Pappagari R, Kulkarni P, Villalba J, Carmiel Y, Dehak N (2018) Deep neural networks for emotion recognition combining audio and transcripts. Proc Interspeech 2018:247\u2013251","journal-title":"Proc Interspeech"},{"key":"3346_CR11","doi-asserted-by":"crossref","unstructured":"Eyben F, Weninger F, Gross F, Schuller B (2013) Recent developments in opensmile, the Munich open-source multimedia feature extractor. In: Proceedings of the 21st ACM International Conference on Multimedia. ACM, pp 835\u2013838","DOI":"10.1145\/2502081.2502224"},{"key":"3346_CR12","doi-asserted-by":"crossref","unstructured":"Fewzee P, Karray F (2012) Dimensionality reduction for emotional speech recognition. In: 2012 International Conference on Privacy, Security, Risk and Trust and 2012 International Conference on Social Computing. IEEE, pp 532\u2013537","DOI":"10.1109\/SocialCom-PASSAT.2012.83"},{"key":"3346_CR13","doi-asserted-by":"crossref","unstructured":"Gamage KW, Sethu V, Ambikairajah E (2017) Salience based lexical features for emotion recognition. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 5830\u20135834","DOI":"10.1109\/ICASSP.2017.7953274"},{"key":"3346_CR14","doi-asserted-by":"crossref","unstructured":"Han K, Yu D, Tashev I (2014) Speech emotion recognition using deep neural network and extreme learning machine. In: Fifteenth Annual Conference of the International Speech Communication Association","DOI":"10.21437\/Interspeech.2014-57"},{"key":"3346_CR15","doi-asserted-by":"crossref","unstructured":"Hantke S, Eyben F, Appel T, Schuller B (2015) ihearu-play: introducing a game for crowdsourced data collection for affective computing. In: 2015 International Conference on Affective Computing and Intelligent Interaction (ACII). IEEE, pp 891\u2013897","DOI":"10.1109\/ACII.2015.7344680"},{"key":"3346_CR16","doi-asserted-by":"publisher","first-page":"3137","DOI":"10.21437\/Interspeech.2017-409","volume":"2017","author":"S Hantke","year":"2017","unstructured":"Hantke S, Sagha H, Cummins N, Schuller B (2017) Emotional speech of mentally and physically disabled individuals: introducing the emotass database and first findings. Proc Interspeech 2017:3137\u20133141","journal-title":"Proc Interspeech"},{"key":"3346_CR17","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp 448\u2013456"},{"key":"3346_CR18","unstructured":"Klambauer G, Unterthiner T, Mayr A, Hochreiter S (2017) Self-normalizing neural networks. In: Advances in Neural Information Processing Systems, pp 971\u2013980"},{"key":"3346_CR19","doi-asserted-by":"crossref","unstructured":"Lee J, Tashev I (2015) High-level feature representation using recurrent neural network for speech emotion recognition. In: Sixteenth Annual Conference of the International Speech Communication Association","DOI":"10.21437\/Interspeech.2015-336"},{"key":"3346_CR20","doi-asserted-by":"crossref","unstructured":"Mirsamadi S, Barsoum E, Zhang C (2017) Automatic speech emotion recognition using recurrent neural networks with local attention. 2017 IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP). IEEE, pp 2227\u20132231","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"3346_CR21","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.21437\/Interspeech.2017-917","volume":"2017","author":"M Neumann","year":"2017","unstructured":"Neumann M, Vu NT (2017) Attentive convolutional neural network based speech emotion recognition: a study on the impact of input features, signal length, and acted speech. Proc Interspeech 2017:1263\u20131267","journal-title":"Proc Interspeech"},{"issue":"2","key":"3346_CR22","first-page":"101","volume":"6","author":"Y Pan","year":"2012","unstructured":"Pan Y, Shen P, Shen L (2012) Speech emotion recognition using support vector machine. Int J Smart Home 6(2):101\u2013108","journal-title":"Int J Smart Home"},{"issue":"6","key":"3346_CR23","doi-asserted-by":"publisher","first-page":"2986","DOI":"10.1007\/s11227-018-2499-y","volume":"75","author":"S Panwar","year":"2019","unstructured":"Panwar S, Rad P, Choo KKR, Roopaei M (2019) Are you emotional or depressed? Learning about your emotional state from your music using machine learning. J Supercomput 75(6):2986\u20133009","journal-title":"J Supercomput"},{"key":"3346_CR24","doi-asserted-by":"crossref","unstructured":"Quan C, Wan D, Zhang B, Ren F (2013) Reduce the dimensions of emotional features by principal component analysis for speech emotion recognition. In: Proceedings of the 2013 IEEE\/SICE International Symposium on System Integration. IEEE, pp 222\u2013226","DOI":"10.1109\/SII.2013.6776653"},{"key":"3346_CR25","doi-asserted-by":"publisher","first-page":"1243","DOI":"10.21437\/Interspeech.2017-1421","volume":"2017","author":"S Sahu","year":"2017","unstructured":"Sahu S, Gupta R, Sivaraman G, AbdAlmageed W, Espy-Wilson C (2017) Adversarial auto-encoders for speech based emotion recognition. Proc Interspeech 2017:1243\u20131247","journal-title":"Proc Interspeech"},{"key":"3346_CR26","doi-asserted-by":"publisher","first-page":"3693","DOI":"10.21437\/Interspeech.2018-1883","volume":"2018","author":"S Sahu","year":"2018","unstructured":"Sahu S, Gupta R, Espy-Wilson C (2018) On enhancing speech emotion recognition using generative adversarial networks. Proc Interspeech 2018:3693\u20133697","journal-title":"Proc Interspeech"},{"key":"3346_CR27","doi-asserted-by":"crossref","unstructured":"Schuller B, Rigoll G, Lang M (2003) Hidden Markov model-based speech emotion recognition. In: 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings (ICASSP\u201903), vol 2. IEEE, pp II-1","DOI":"10.1109\/ICME.2003.1220939"},{"key":"3346_CR28","doi-asserted-by":"crossref","unstructured":"Schuller B, Rigoll G, Lang M (2004) Speech emotion recognition combining acoustic features and linguistic information in a hybrid support vector machine-belief network architecture. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, 2004. Proceedings (ICASSP\u201904), vol\u00a01. IEEE, pp I-577","DOI":"10.1109\/ICASSP.2004.1326051"},{"issue":"1","key":"3346_CR29","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.csl.2012.02.005","volume":"27","author":"B Schuller","year":"2013","unstructured":"Schuller B, Steidl S, Batliner A, Burkhardt F, Devillers L, M\u00fcLler C, Narayanan S (2013) Paralinguistics in speech and language-state-of-the-art and the challenge. Comput Speech Lang 27(1):4\u201339","journal-title":"Comput Speech Lang"},{"key":"3346_CR30","doi-asserted-by":"publisher","first-page":"2001","DOI":"10.21437\/Interspeech.2016-129","volume":"2016","author":"B Schuller","year":"2016","unstructured":"Schuller B, Steidl S, Batliner A, Hirschberg J, Burgoon JK, Baird A, Elkins A, Zhang Y, Coutinho E, Evanini K (2016) The interspeech 2016 computational paralinguistics challenge: deception, sincerity & native language. Interspeech 2016:2001\u20132005","journal-title":"Interspeech"},{"key":"3346_CR31","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A, Bergelson E, Krajewski J, Janott C, Amatuni A, Casillas M, Seidl A, Soderstrom M, et al (2017) The interspeech 2017 computational paralinguistics challenge: addressee, cold & snoring. In: Computational Paralinguistics Challenge (ComParE), Interspeech 2017, pp 3442\u20133446","DOI":"10.21437\/Interspeech.2017-43"},{"key":"3346_CR32","doi-asserted-by":"publisher","first-page":"122","DOI":"10.21437\/Interspeech.2018-51","volume":"2018","author":"B Schuller","year":"2018","unstructured":"Schuller B, Steidl S, Batliner A, Marschik PB, Baumeister H, Dong F, Hantke S, Pokorny FB, Rathner EM, Bartl-Pokorny KD et al (2018) The interspeech 2018 computational paralinguistics challenge: atypical & self-assessed affect, crying & heart beats. Proc Interspeech 2018:122\u2013126","journal-title":"Proc Interspeech"},{"key":"3346_CR33","doi-asserted-by":"crossref","unstructured":"Schuller BW, Batliner A, Bergler C, Pokorny FB, Krajewski J, Cychosz M, Schmitt M, et\u00a0al (2019) The interspeech 2019 computational paralinguistics challenge: styrian dialects, continuous sleepiness, baby sounds & orca activity. In: Proceedings of Interspeech","DOI":"10.21437\/Interspeech.2019-1122"},{"key":"3346_CR34","doi-asserted-by":"publisher","first-page":"2398","DOI":"10.21437\/Interspeech.2016-1473","volume":"2016","author":"M Senoussaoui","year":"2016","unstructured":"Senoussaoui M, Cardinal P, Dehak N, Koerich AL (2016) Native language detection using the i-vector framework. Interspeech 2016:2398\u20132402","journal-title":"Interspeech"},{"key":"3346_CR35","doi-asserted-by":"crossref","unstructured":"Yoon S, Byun S, Jung K (2018) Multimodal speech emotion recognition using audio and text. In: 2018 IEEE Spoken Language Technology Workshop (SLT). IEEE, pp 112\u2013118","DOI":"10.1109\/SLT.2018.8639583"},{"key":"3346_CR36","doi-asserted-by":"crossref","unstructured":"Yoon S, Byun S, Dey S, Jung K (2019) Speech emotion recognition using multi-hop attention mechanism. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP). IEEE, pp 2822\u20132826","DOI":"10.1109\/ICASSP.2019.8683483"},{"key":"3346_CR37","volume-title":"Automatic speech recognition","author":"D Yu","year":"2016","unstructured":"Yu D, Deng L (2016) Automatic speech recognition. Springer, Berlin"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03346-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-020-03346-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03346-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T09:19:58Z","timestamp":1666948798000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-020-03346-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,13]]},"references-count":37,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2020,10]]}},"alternative-id":["3346"],"URL":"https:\/\/doi.org\/10.1007\/s11227-020-03346-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2020,6,13]]},"assertion":[{"value":"13 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}