{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:23:20Z","timestamp":1771064600180,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031059353","type":"print"},{"value":"9783031059360","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-05936-0_31","type":"book-chapter","created":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:07:09Z","timestamp":1652198829000},"page":"392-404","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Leveraged Mel Spectrograms Using Harmonic and\u00a0Percussive Components in\u00a0Speech Emotion Recognition"],"prefix":"10.1007","author":[{"given":"David Hason","family":"Rudd","sequence":"first","affiliation":[]},{"given":"Huan","family":"Huo","sequence":"additional","affiliation":[]},{"given":"Guandong","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,11]]},"reference":[{"key":"31_CR1","first-page":"222","volume":"20","author":"D Alu","year":"2017","unstructured":"Alu, D., Zoltan, E., Stoica, I.C.: Voice based emotion recognition with convolutional neural networks for companion robots. Sci. Technol. 20, 222\u2013240 (2017)","journal-title":"Sci. Technol."},{"key":"31_CR2","doi-asserted-by":"crossref","unstructured":"Badshah, A.M., Ahmad, J., Rahim, N., Baik, S.W.: Speech emotion recognition from spectrograms with deep convolutional neural network. In: 2017 International Conference on Platform Technology and Service (PlatCon), pp. 1\u20135 (2017)","DOI":"10.1109\/PlatCon.2017.7883728"},{"key":"31_CR3","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W.F., Weiss, B., et al.: A database of German emotional speech. In: Interspeech, vol. 5, pp. 1517\u20131520 (2005)","DOI":"10.21437\/Interspeech.2005-446"},{"issue":"4","key":"31_CR4","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42(4), 335\u2013359 (2008). https:\/\/doi.org\/10.1007\/s10579-008-9076-6","journal-title":"Lang. Resour. Eval."},{"issue":"6","key":"31_CR5","doi-asserted-by":"publisher","first-page":"1142","DOI":"10.1109\/TASL.2009.2017438","volume":"17","author":"S Chu","year":"2009","unstructured":"Chu, S., Narayanan, S., Kuo, C.C.J.: Environmental sound recognition with time-frequency audio features. IEEE Trans. Audio Speech Lang. Process. 17(6), 1142\u20131158 (2009)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"1","key":"31_CR6","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/79.911197","volume":"18","author":"R Cowie","year":"2001","unstructured":"Cowie, R.: Emotion recognition in human-computer interaction. IEEE Sig. Process. Mag. 18(1), 32\u201380 (2001)","journal-title":"IEEE Sig. Process. Mag."},{"key":"31_CR7","doi-asserted-by":"crossref","unstructured":"Cummins, N., Amiriparian, S., Hagerer, G., Batliner, A., Steidl, S., Schuller, B.W.: An image-based deep spectrum feature representation for the recognition of emotional speech. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 478\u2013484 (2017)","DOI":"10.1145\/3123266.3123371"},{"issue":"8","key":"31_CR8","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s00521-016-2712-y","volume":"29","author":"S Demircan","year":"2018","unstructured":"Demircan, S., Kahramanli, H.: Application of fuzzy c-means clustering algorithm to spectral features for emotion classification from speech. Neural Comput. Appl. 29(8), 59\u201366 (2018)","journal-title":"Neural Comput. Appl."},{"key":"31_CR9","unstructured":"Fitzgerald, D.: Harmonic\/percussive separation using median filtering. In: Proceedings of the International Conference on Digital Audio Effects (DAFx), vol. 13, pp. 1\u20134 (2010)"},{"issue":"5","key":"31_CR10","doi-asserted-by":"publisher","first-page":"479","DOI":"10.3390\/e21050479","volume":"21","author":"N Hajarolasvadi","year":"2019","unstructured":"Hajarolasvadi, N., Demirel, H.: 3D CNN-based speech emotion recognition using k-means clustering and spectrograms. Entropy 21(5), 479\u2013495 (2019)","journal-title":"Entropy"},{"issue":"1","key":"31_CR11","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1109\/PROC.1978.10837","volume":"66","author":"FJ Harris","year":"1978","unstructured":"Harris, F.J.: On the use of windows for harmonic analysis with the discrete Fourier transform. Proc. IEEE 66(1), 51\u201383 (1978)","journal-title":"Proc. IEEE"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Harte, C., Sandler, M., Gasser, M.: Detecting harmonic change in musical audio. In: Proceedings of the 1st ACM Workshop on Audio and Music Computing Multimedia, pp. 21\u201326 (2006)","DOI":"10.1145\/1178723.1178727"},{"key":"31_CR13","doi-asserted-by":"crossref","unstructured":"Huang, Z., Dong, M., Mao, Q., Zhan, Y.: Speech emotion recognition using CNN. In: Proceedings of the 22nd ACM International Conference Media, pp. 801\u2013804 (2014)","DOI":"10.1145\/2647868.2654984"},{"key":"31_CR14","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa, D., Demirci, M.F., Yazici, A.: Speech emotion recognition with deep convolutional neural networks. Biomed. Sig. Process. Control 59, 101894\u2013101904 (2020)","journal-title":"Biomed. Sig. Process. Control"},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Jin, Q., Li, C., Chen, S., Wu, H.: Speech emotion recognition with acoustic and lexical features. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4749\u20134753 (2015)","DOI":"10.1109\/ICASSP.2015.7178872"},{"issue":"1","key":"31_CR16","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.csl.2012.01.008","volume":"27","author":"M Li","year":"2013","unstructured":"Li, M., Han, K.J., Narayanan, S.: Automatic speaker age and gender recognition using acoustic and prosodic level information fusion. Comput. Speech Lang. 27(1), 151\u2013167 (2013)","journal-title":"Comput. Speech Lang."},{"issue":"5","key":"31_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone, S.R., Russo, F.A.: The Ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PLoS ONE 13(5), 1\u201335 (2018)","journal-title":"PLoS ONE"},{"key":"31_CR18","doi-asserted-by":"crossref","unstructured":"McFee, B., et al.: librosa: audio and music signal analysis in Python. In: Proceedings of the 14th Python in Science Conference, vol. 8, pp. 18\u201325 (2015)","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"31_CR19","doi-asserted-by":"crossref","unstructured":"Meinedo, H., Trancoso, I.: Age and gender classification using fusion of acoustic and prosodic features. In: 11th Annual Conference of the International Speech Communication Association, pp. 1\u20134 (2010)","DOI":"10.21437\/Interspeech.2010-745"},{"key":"31_CR20","doi-asserted-by":"publisher","first-page":"125868","DOI":"10.1109\/ACCESS.2019.2938007","volume":"7","author":"H Meng","year":"2019","unstructured":"Meng, H., Yan, T., Yuan, F., Wei, H.: Speech emotion recognition from 3D Log-Mel spectrograms with deep learning network. IEEE Access 7, 125868\u2013125881 (2019)","journal-title":"IEEE Access"},{"issue":"9","key":"31_CR21","doi-asserted-by":"publisher","first-page":"34","DOI":"10.5120\/11872-7667","volume":"69","author":"A Milton","year":"2013","unstructured":"Milton, A., Sharmy Roy, S., Tamil Selvi, S.: SVM scheme for speech emotion recognition using MFCC feature. Int. J. Comput. Appl. 69(9), 34\u201339 (2013). https:\/\/doi.org\/10.5120\/11872-7667","journal-title":"Int. J. Comput. Appl."},{"key":"31_CR22","unstructured":"Motl\u0131cek, P.: Feature extraction in speech coding and recognition. Technical Report of Ph.D. research internship in ASP Group, pp. 1\u201350 (2002)"},{"key":"31_CR23","unstructured":"P\u00e9rez-Rosas, V., Mihalcea, R., Morency, L.P.: Utterance-level multimodal sentiment analysis. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 973\u2013982 (2013)"},{"key":"31_CR24","doi-asserted-by":"crossref","unstructured":"Popova, A.S., Rassadin, A.G., Ponomarenko, A.A.: Emotion recognition in sound. In: International Conference on Neuroinformatics, pp. 117\u2013124 (2017)","DOI":"10.1007\/978-3-319-66604-4_18"},{"key":"31_CR25","unstructured":"Rozgi\u0107, V., Ananthakrishnan, S., Saleem, S., Kumar, R., Prasad, R.: Ensemble of SVM trees for multimodal emotion recognition. In: Proceedings of the 2012 Asia Pacific Signal and Information Processing Association Annual Summit and Conference, pp. 1\u20134 (2012)"},{"issue":"3","key":"31_CR26","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"31_CR27","doi-asserted-by":"crossref","unstructured":"Satt, A., Rozenberg, S., Hoory, R.: Efficient emotion recognition from speech using deep learning on spectrograms. In: Interspeech, pp. 1089\u20131093 (2017)","DOI":"10.21437\/Interspeech.2017-200"},{"issue":"3\u20134","key":"31_CR28","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1016\/j.specom.2005.02.018","volume":"46","author":"E Shriberg","year":"2005","unstructured":"Shriberg, E., Ferrer, L., Kajarekar, S., Venkataraman, A., Stolcke, A.: Modeling prosodic feature sequences for speaker recognition. Speech Commun. 46(3\u20134), 455\u2013472 (2005)","journal-title":"Speech Commun."},{"issue":"1","key":"31_CR29","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1109\/TAFFC.2015.2392101","volume":"6","author":"K Wang","year":"2015","unstructured":"Wang, K., An, N., Li, B.N., Zhang, Y., Li, L.: Speech emotion recognition using Fourier parameters. IEEE Trans. Affect. Comput. 6(1), 69\u201375 (2015)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"31_CR30","doi-asserted-by":"crossref","unstructured":"Weninger, F., W\u00f6llmer, M., Schuller, B.: Emotion recognition in naturalistic speech and language-a survey. In: Emotion Recognition: A Pattern Analysis Approach, pp. 237\u2013267 (2015)","DOI":"10.1002\/9781118910566.ch10"},{"issue":"5","key":"31_CR31","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1016\/j.specom.2010.08.013","volume":"53","author":"S Wu","year":"2011","unstructured":"Wu, S., Falk, T.H., Chan, W.Y.: Automatic speech emotion recognition using modulation spectral features. Speech Commun. 53(5), 768\u2013785 (2011)","journal-title":"Speech Commun."},{"key":"31_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"566","DOI":"10.1007\/978-3-540-30543-9_71","volume-title":"Advances in Multimedia Information Processing - PCM 2004","author":"M Xu","year":"2004","unstructured":"Xu, M., Duan, L.-Y., Cai, J., Chia, L.-T., Xu, C., Tian, Q.: HMM-based audio keyword generation. In: Aizawa, K., Nakamura, Y., Satoh, S. (eds.) PCM 2004. LNCS, vol. 3333, pp. 566\u2013574. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-30543-9_71"},{"key":"31_CR33","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao, J., Mao, X., Chen, L.: Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed. Sig. Process. Control 47, 312\u2013323 (2019)","journal-title":"Biomed. Sig. Process. Control"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-05936-0_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,6]],"date-time":"2022-06-06T16:07:29Z","timestamp":1654531649000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-05936-0_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031059353","9783031059360"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-05936-0_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"11 May 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chengdu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 May 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 May 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/pakdd.net\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"558","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"121","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.75","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6.45","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}