{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T00:07:24Z","timestamp":1744157244287,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030042110"},{"type":"electronic","value":"9783030042127"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-04212-7_6","type":"book-chapter","created":{"date-parts":[[2018,11,16]],"date-time":"2018-11-16T12:01:33Z","timestamp":1542369693000},"page":"62-71","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Convolutional Neural Network with Spectrogram and Perceptual Features for Speech Emotion Recognition"],"prefix":"10.1007","author":[{"given":"Linjuan","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianwu","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lili","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haotian","family":"Guan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,11,17]]},"reference":[{"key":"6_CR1","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/978-3-319-08491-6_5","volume-title":"Human-Computer Systems Interaction: Backgrounds and Applications 3","author":"A Ko\u0142akowska","year":"2014","unstructured":"Ko\u0142akowska, A., Landowska, A., Szwoch, M., Szwoch, W., Wr\u00f3bel, M.R.: Emotion recognition and its applications. In: Hippe, Z.S., Kulikowski, J.L., Mroczek, T., Wtorek, J. (eds.) Human-Computer Systems Interaction: Backgrounds and Applications 3. AISC, vol. 300, pp. 51\u201362. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-08491-6_5"},{"issue":"3","key":"6_CR2","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"El Ayadi, M., Kamel, M.S., Karray, F.: Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recognit. 44(3), 572\u2013587 (2011). https:\/\/doi.org\/10.1016\/j.patcog.2010.09.020","journal-title":"Pattern Recognit."},{"issue":"9-10","key":"6_CR3","doi-asserted-by":"publisher","first-page":"1062","DOI":"10.1016\/j.specom.2011.01.011","volume":"53","author":"Bj\u00f6rn Schuller","year":"2011","unstructured":"Schuller, B., Batliner, A., Steidl, S., Seppi, D.: Recognising realistic emotions and affect in speech: state of the art and lessons learnt from the first challenge. Speech Commun. 53(9\u201310), 1062\u20131087 (2011). https:\/\/doi.org\/10.1016\/j.specom.2011.01.011","journal-title":"Speech Communication"},{"key":"6_CR4","doi-asserted-by":"publisher","unstructured":"Ringeval, F., et al.: Av+ ec 2015: the first affect recognition challenge bridging across audio, video, and physiological data. In: 5th International Workshop on Audio\/Visual Emotion Challenge, pp. 3\u20138. ACM (2015). https:\/\/doi.org\/10.1145\/2808196.2811642","DOI":"10.1145\/2808196.2811642"},{"key":"6_CR5","doi-asserted-by":"publisher","unstructured":"Valstar, M., et al.: Avec 2016: depression, mood, and emotion recognition workshop and challenge. In: 6th International Workshop on Audio\/Visual Emotion Challenge, pp. 3\u201310. ACM (2016). https:\/\/doi.org\/10.1145\/2964284.2980532","DOI":"10.1145\/2964284.2980532"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A.: The Interspeech 2009 emotion challenge. In: Tenth Annual Conference of the International Speech Communication Association (2009)","DOI":"10.21437\/Interspeech.2009-103"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Han, K., Yu, D., Tashev, I.: Speech emotion recognition using deep neural network and extreme learning machine. In: INTERSPEECH, pp. 223\u2013227 (2014). https:\/\/www.microsoft.com\/en-us\/research\/publication\/speech-emotion-recognition-using-deep-neural-network-and-extreme-learning-machine\/","DOI":"10.21437\/Interspeech.2014-57"},{"key":"6_CR8","doi-asserted-by":"publisher","unstructured":"Huang, C. W., Narayanan, S. S.: Attention assisted discovery of sub-utterance structure in speech emotion recognition. In: INTERSPEECH, pp. 1387\u20131391 (2016). https:\/\/doi.org\/10.21437\/interspeech.2016-448","DOI":"10.21437\/interspeech.2016-448"},{"key":"6_CR9","doi-asserted-by":"publisher","unstructured":"Mirsamadi, S., Barsoum, E., Zhang, C.: Automatic speech emotion recognition using recurrent neural networks with local attention. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2227\u20132231. IEEE (2017). https:\/\/doi.org\/10.1109\/icassp.2017.7952552","DOI":"10.1109\/icassp.2017.7952552"},{"key":"6_CR10","doi-asserted-by":"publisher","unstructured":"Variani, E., Lei, X., McDermott, E., Moreno, I. L., Gonzalez-Dominguez, J.: Deep neural networks for small footprint text-dependent speaker verification. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4052\u20134056. IEEE (2014). https:\/\/doi.org\/10.1109\/icassp.2014.6854363","DOI":"10.1109\/icassp.2014.6854363"},{"key":"6_CR11","unstructured":"Hannun, A., et al.: Deep Speech: Scaling up End-to-end Speech Recognition (2014). http:\/\/arxiv.org\/abs\/1412.5567"},{"key":"6_CR12","unstructured":"Amodei, D., et al.: Deep Speech 2: end-to-end speech recognition in English and Mandarin. In: International Conference on Machine Learning, pp. 173\u2013182 (2016). http:\/\/dl.acm.org\/citation.cfm?id=3045390.3045410"},{"issue":"4","key":"6_CR13","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1016\/S0167-6393(03)00099-2","volume":"41","author":"TL Nwe","year":"2003","unstructured":"Nwe, T.L., Foo, S.W., De Silva, L.C.: Speech emotion recognition using hidden markov models. Speech Commun. 41(4), 603\u2013623 (2003). https:\/\/doi.org\/10.1016\/S0167-6393(03)00099-2","journal-title":"Speech Commun."},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Huang, Z., Dong, M., Mao, Q., Zhan, Y.: Speech emotion recognition using CNN. In: 22nd ACM international conference on Multimedia, pp. 801\u2013804. ACM (2014). http:\/\/doi.acm.org\/10.1145\/2647868.2654984","DOI":"10.1145\/2647868.2654984"},{"key":"6_CR15","doi-asserted-by":"publisher","unstructured":"Lim, W., Jang, D., Lee, T.: Speech emotion recognition using convolutional and recurrent neural networks. In: Signal and Information Processing Association Annual Summit and Conference (APSIPA), pp. 1\u20134. IEEE, Asia-Pacific (2016). https:\/\/doi.org\/10.1109\/apsipa.2016.7820699","DOI":"10.1109\/apsipa.2016.7820699"},{"key":"6_CR16","doi-asserted-by":"publisher","unstructured":"Satt, A., Rozenberg, S., Hoory, R.: Efficient emotion recognition from speech using deep learning on spectrograms. In: INTERSPEECH, pp. 1089\u20131093 (2017). https:\/\/doi.org\/10.21437\/interspeech.2017-200","DOI":"10.21437\/interspeech.2017-200"},{"key":"6_CR17","doi-asserted-by":"publisher","unstructured":"Guo, L., Wang, L., Dang, J., Zhang, L., Guan, H.: A feature fusion method based on extreme learning machine for speech emotion recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 2666\u20132670 (2018). https:\/\/doi.org\/10.1109\/icassp.2018.8462219","DOI":"10.1109\/icassp.2018.8462219"},{"key":"6_CR18","doi-asserted-by":"publisher","unstructured":"Guo, L., Wang, L., Dang, J., Zhang, L., Guan, H., Li, X.: Speech emotion recognition by combining amplitude and phase information using convolutional neural network. In: INTERSPEECH, pp. 1611\u20131615 (2018). https:\/\/doi.org\/10.21437\/interspeech.2018-2156","DOI":"10.21437\/interspeech.2018-2156"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Hu, H., Xu, M.X., Wu, W.: Fusion of global statistical and segmental spectral features for speech emotion recognition. In: INTERSPEECH, pp. 2269\u20132272 (2007)","DOI":"10.21437\/Interspeech.2007-616"},{"key":"6_CR20","doi-asserted-by":"publisher","unstructured":"Yu, D., et al..: Deep convolutional neural networks with layer-wise context expansion and attention. In: INTERSPEECH, pp. 17\u201321 (2016). https:\/\/doi.org\/10.21437\/interspeech.2016-251","DOI":"10.21437\/interspeech.2016-251"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Lee, J., Tashev, I.: High-level feature representation using recurrent neural network for speech emotion recognition. In: Sixteenth Annual Conference of the International Speech Communication Association (2015). https:\/\/www.microsoft.com\/en-us\/research\/publication\/high-level-feature-representation-using-recurrent-neural-network-for-speech-emotion-recognition\/","DOI":"10.21437\/Interspeech.2015-336"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Petrushin, V. A.: Emotion recognition in speech signal: experimental study, development, and application. In: Sixth International Conference on Spoken Language Processing, pp. 222\u2013225 (2000)","DOI":"10.21437\/ICSLP.2000-791"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W. F., Weiss, B.: A Database of German Emotional Speech. In: Ninth European Conference on Speech Communication and Technology, pp. 1517\u20131520 (2005)","DOI":"10.21437\/Interspeech.2005-446"},{"key":"6_CR24","unstructured":"Xie, B.: Research on Key Issues of Mandarin Speech Emotion Recognition [Ph.D. Thesis]. Hangzhou: Zhejiang University (2006)"},{"key":"6_CR25","doi-asserted-by":"publisher","unstructured":"Provost, E. M.: Identifying salient sub-utterance emotion dynamics using flexible units and estimates of affective flow. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3682\u20133686. IEEE (2013). https:\/\/doi.org\/10.1109\/icassp.2013.6638345","DOI":"10.1109\/icassp.2013.6638345"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-04212-7_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T17:04:56Z","timestamp":1710263096000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-04212-7_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030042110","9783030042127"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-04212-7_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"17 November 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Siem Reap","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cambodia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conference.cs.cityu.edu.hk\/iconip\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"575","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"401","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"70% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}