{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T16:37:46Z","timestamp":1743007066917,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031233869"},{"type":"electronic","value":"9783031233876"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-23387-6_10","type":"book-chapter","created":{"date-parts":[[2023,1,28]],"date-time":"2023-01-28T10:32:17Z","timestamp":1674901937000},"page":"139-157","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Improved Capsule Network for Speech Emotion Recognition"],"prefix":"10.1007","author":[{"given":"Huiyun","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Heming","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,29]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","first-page":"1467","DOI":"10.1007\/s11235-011-9624-z","volume":"52","author":"S Ramakrishnan","year":"2013","unstructured":"Ramakrishnan, S., Emary, I.: Speech emotion recognition approaches in human computer interaction. Telecommun. Syst. 52, 1467\u20131478 (2013)","journal-title":"Telecommun. Syst."},{"doi-asserted-by":"crossref","unstructured":"John, K., Saurous, R.: Emotion recognition from human speech using temporal information and deep learning. In: Proceedings of Interspeech, Hyderabad, India, pp. 937\u2013940 (2018)","key":"10_CR2","DOI":"10.21437\/Interspeech.2018-1132"},{"doi-asserted-by":"crossref","unstructured":"Mao, S., Tao, D., Zhang, G., Ching, P.C., Lee, T.: Revisiting hidden Markov models for speech emotion recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United Kingdom, pp. 6715\u20136719 (2019)","key":"10_CR3","DOI":"10.1109\/ICASSP.2019.8683172"},{"key":"10_CR4","doi-asserted-by":"publisher","first-page":"26777","DOI":"10.1109\/ACCESS.2019.2901352","volume":"7","author":"I Shahin","year":"2019","unstructured":"Shahin, I., Nassif, A.B., Hamsa, S.: Emotion recognition using hybrid Gaussian mixture model and deep neural network. IEEE Access 7, 26777\u201326787 (2019)","journal-title":"IEEE Access"},{"doi-asserted-by":"crossref","unstructured":"Teng, Z., Ren, F., Kuroiwa, S.: Emotion recognition from text based on the rough set theory and the support vector machines. In: Proceedings of Natural Language Processing and Knowledge Engineering, Beijing, China, pp. 36\u201341 (2007)","key":"10_CR5","DOI":"10.1109\/NLPKE.2007.4368008"},{"unstructured":"Song, M., Chen, C., You, M.: Audio-visual based emotion recognition using tripled hidden Markov model. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Montreal, Canada, pp. 877\u2013880 (2004)","key":"10_CR6"},{"doi-asserted-by":"crossref","unstructured":"Vydana, H.K., Kumar, P.P., Krishna, K.S.R., Vuppala, A.K.: Improved emotion recognition using GMM-UBMs. In: Proceedings of IEEE International Conference on Signal Processing and Communication Engineering Systems, Guntur, India, pp. 53\u201357 (2015)","key":"10_CR7","DOI":"10.1109\/SPACES.2015.7058214"},{"doi-asserted-by":"crossref","unstructured":"Hu, H., Xu, M.-X., Wu, W.: GMM supervector based SVM with spectral features for speech emotion recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Honolulu, USA, pp. 413\u2013416 (2007)","key":"10_CR8","DOI":"10.1109\/ICASSP.2007.366937"},{"doi-asserted-by":"crossref","unstructured":"Mao, X., Chen, L., Fu, L.: Multi-level speech emotion recognition based on HMM and ANN. In: Proceedings of WRI World Congress on Computer Science and Information Engineering, Los Angeles, USA, pp. 225\u2013229 (2009)","key":"10_CR9","DOI":"10.1109\/CSIE.2009.113"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., Han, W., Ruan, H., et al.: Sequence-to-sequence modelling for categorical speech emotion recognition using recurrent neural network. In: Proceedings of First Asian Conference on Affective Computing and Intelligent Interaction, Beijing, China, pp. 1\u20134 (2018)","key":"10_CR10","DOI":"10.1109\/ACIIAsia.2018.8470325"},{"doi-asserted-by":"crossref","unstructured":"Bertero, D., Fung, P.: A first look into a convolutional neural network for speech emotion detection. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, USA, pp. 5115\u20135119 (2017)","key":"10_CR11","DOI":"10.1109\/ICASSP.2017.7953131"},{"doi-asserted-by":"crossref","unstructured":"Li, R., Wu, Z., Jia, J., Zhao, S., Meng, H.: Dilated residual network with multi-head self-attention for speech emotion recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, UK, 2019, pp. 6675\u20136679","key":"10_CR12","DOI":"10.1109\/ICASSP.2019.8682154"},{"unstructured":"Sabour, S., Frosst, N., Hinton, G.E.: Dynamic routing between capsules. In: Proceedings of Advances in Neural Information Processing Systems, 2017, pp. 3856\u20133866.","key":"10_CR13"},{"unstructured":"Duarte, K., Rawat, Y.S., Shah, M.: Videocapsulenet: a simplified network for action detection. In: Proceedings of Advances in Neural Information Processing Systems (NIPS), Long Beach, US, pp. 7610\u20137619 (2018)","key":"10_CR14"},{"key":"10_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ACCESS.2018.2873804","volume":"6","author":"BW Zhang","year":"2018","unstructured":"Zhang, B.W., Xu, X.F., Yang, M., Chen, X.J., Ye, Y.M.: Cross-domain sentiment classification by capsule network with semantic rules. IEEE Access 6, 1\u20131 (2018)","journal-title":"IEEE Access"},{"unstructured":"Min, Y., Zhao, M., Ye, J.B., Lei, Z., Zhang, S.: Investigating capsule networks with dynamic routing for text classification. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), Brussels, Belgium, pp. 3110\u20133119 (2018)","key":"10_CR16"},{"key":"10_CR17","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1109\/TAFFC.2015.2392101","volume":"25","author":"K Wang","year":"2015","unstructured":"Wang, K., An, N., Li, B.N., Zhang, Y., Li, L.: Speech emotion recognition using Fourier parameters. IEEE Trans. Affect. Comput. 25, 69\u201375 (2015)","journal-title":"IEEE Trans. Affect. Comput."},{"doi-asserted-by":"crossref","unstructured":"Cirakman, O., Gunsel, B.: Online speaker emotion tracking with a dynamic state transition model. In: Proceedings of International Conference on Pattern Recognition (ICPR), Cancun, Mexico, pp. 307\u2013312 (2016)","key":"10_CR18","DOI":"10.1109\/ICPR.2016.7899651"},{"issue":"2","key":"10_CR19","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1109\/TAFFC.2017.2702653","volume":"10","author":"Y Kim","year":"2019","unstructured":"Kim, Y., Provost, E.: ISLA: Temporal segmentation and labeling for audio-visual emotion recognition. IEEE Trans. Affect. Comput. 10(2), 196\u2013208 (2019)","journal-title":"IEEE Trans. Affect. Comput."},{"unstructured":"George, T., Fabien, R., Raymond, B.: Adieu features? end-to-end speech emotion recognition using a deep convolution recurrent network. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, Shanghai, China, pp. 5200\u20135204 (2016)","key":"10_CR20"},{"doi-asserted-by":"crossref","unstructured":"Wang, L., Dang, J., Zhang, L.: Speech emotion recognition by combining amplitude and phase information using convolutional neural network. In: Proceedings of Interspeech, Hyderabad, India, pp. 1611\u20131615 (2018)","key":"10_CR21","DOI":"10.21437\/Interspeech.2018-2156"},{"doi-asserted-by":"crossref","unstructured":"Wu, X.X., Liu, S.X., Cao, Y.W., Li, X., Yu, J.W., Dai, D.Y.: Speech emotion recognition using capsule network. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, UK, pp. 6695\u20136699 (2019)","key":"10_CR22","DOI":"10.1109\/ICASSP.2019.8683163"},{"issue":"12","key":"10_CR23","doi-asserted-by":"publisher","first-page":"1850","DOI":"10.1109\/LSP.2018.2873892","volume":"25","author":"CQ Xiang","year":"2018","unstructured":"Xiang, C.Q., Zhang, L., Tang, Y., Zou, W.B., Xu, C.: MS-CapsNet: a novel multi-scale capsule network. IEEE Signal Process. Lett. 25(12), 1850\u20131854 (2018)","journal-title":"IEEE Signal Process. Lett."},{"issue":"1","key":"10_CR24","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1016\/j.specom.2011.07.005","volume":"54","author":"P Janovi","year":"2012","unstructured":"Janovi, P., Zou, X.: Speech enhancement based on Sparse Code Shrinkage employing multiple speech models. Speech Commun. 54(1), 108\u2013118 (2012)","journal-title":"Speech Commun."},{"issue":"4","key":"10_CR25","doi-asserted-by":"publisher","first-page":"1055","DOI":"10.1109\/TAFFC.2019.2916092","volume":"12","author":"J Gideon","year":"2021","unstructured":"Gideon, J., Mclnnis, M.G., Provost, E.M.: Improving cross-corpus speech emotion recognition with adversarial discriminative domain generalization (ADDoG). IEEE Trans. Affect. Comput. 12(4), 1055\u20131068 (2021)","journal-title":"IEEE Trans. Affect. Comput."},{"doi-asserted-by":"crossref","unstructured":"Pappagari, R., Villalba, J., \u017belasko, P.: CopyPaste: An augmentation method for speech emotion recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Toronto, Canada, pp. 6324\u20136328 (2021)","key":"10_CR26","DOI":"10.1109\/ICASSP39728.2021.9415077"},{"doi-asserted-by":"crossref","unstructured":"Raju, V.N.G., Lakshmi, K.P., Jain, V.M., Kalidindi, A., Padma, V.: Study the influence of normalization\/transformation process on the accuracy of supervised classification. In: Proceedings of International Conference on Smart Systems and Inventive Technology (ICSSIT), Tirunelveli, India, pp. 729\u2013735 (2020)","key":"10_CR27","DOI":"10.1109\/ICSSIT48917.2020.9214160"},{"doi-asserted-by":"crossref","unstructured":"Ertam, F., Ayd\u0131n, G.: Data classification with deep learning using Tensorflow. In: Proceedings of International Conference on Computer Science and Engineering (UBMK), Antalya, Turkey, pp. 755\u2013758 (2017)","key":"10_CR28","DOI":"10.1109\/UBMK.2017.8093521"},{"doi-asserted-by":"crossref","unstructured":"Jiang, T., Cheng, J.: Target recognition based on CNN with LeakyReLU and PReLU activation functions. In: Proceedings of International Conference on Sensing, Diagnostics, Prognostics, and Control (SDPC), Beijing, China, pp. 718\u2013722 (2019)","key":"10_CR29","DOI":"10.1109\/SDPC.2019.00136"},{"doi-asserted-by":"crossref","unstructured":"Chen, K., Ding, H., Huo, Q.: Parallelizing Adam optimizer with blockwise model-update filtering. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Barcelona, Spain, pp. 3027\u20133031 (2020)","key":"10_CR30","DOI":"10.1109\/ICASSP40776.2020.9052983"},{"doi-asserted-by":"crossref","unstructured":"Wen, X.C., Liu, K.H., Zhang, W.M., Jiang, K.: The application of capsule neural network-based CNN for speech emotion recognition. In: Proceedings of International Conference on Pattern Recognition (ICPR), Milan, Italy, pp. 9356\u20139362 (2021)","key":"10_CR31","DOI":"10.1109\/ICPR48806.2021.9412360"},{"issue":"7","key":"10_CR32","first-page":"1252","volume":"28","author":"L Chen","year":"2020","unstructured":"Chen, L., Su, W., Wu, M., Pedrycz, W., Hirota, K.: A fuzzy deep neural network with sparse autoencoder for emotional intention understanding in human-robot interaction. IEEE Trans. Fuzzy Syst. 28(7), 1252\u20131264 (2020)","journal-title":"IEEE Trans. Fuzzy Syst."},{"doi-asserted-by":"crossref","unstructured":"Xie, Y., Liang, R., Liang, Z., Huang, C., Zou, C., Schuller, B.: Speech emotion classification using attention-based LSTM. In: IEEE\/ACM Transactions on Audio, Speech, and Language Processing, vol. 27, no. 11, pp. 1675\u20131685 (2019)","key":"10_CR33","DOI":"10.1109\/TASLP.2019.2925934"},{"issue":"10","key":"10_CR34","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen, M., He, X., Yang, J., Zhang, H.: 3-D convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Process. Lett. 25(10), 1440\u20131444 (2018)","journal-title":"IEEE Signal Process. Lett."},{"issue":"1","key":"10_CR35","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TNNLS.2020.3027600","volume":"33","author":"L Yi","year":"2022","unstructured":"Yi, L., Mak, M.W.: Improving speech emotion recognition with adversarial data augmentation network. IEEE Trans. Neural Netw. Learn. Syst. 33(1), 172\u20131844 (2022)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"doi-asserted-by":"crossref","unstructured":"Sugan, N., Sai Srinivas, N.S., Kar, N., Kumar, L.S., Nath, M.K., Kanhe, A.: Performance comparison of different cepstral features for speech emotion recognition. In: Proceedings of International CET Conference on Control, Communication, and Computing (IC4), Thiruvananthapuram, India, pp. 266\u2013271 (2018)","key":"10_CR36","DOI":"10.1109\/CETIC4.2018.8531065"},{"doi-asserted-by":"crossref","unstructured":"Panigrahi, S.N., Palo, H.K.: Emotional speech recognition using particle swarm optimization algorithm. In: Proceedings of International Conference in Advances in Power, Signal, and Information Technology (APSIT), Bhubaneswar, India, pp. 1\u20135 (2021)","key":"10_CR37","DOI":"10.1109\/APSIT52773.2021.9641247"}],"container-title":["Communications in Computer and Information Science","The Recent Advances in Transdisciplinary Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-23387-6_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,13]],"date-time":"2024-10-13T05:45:48Z","timestamp":1728798348000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-23387-6_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031233869","9783031233876"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-23387-6_10","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"29 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SDSC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Southwest Data Science Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Waco, TX","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 March 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 March 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sdsc2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cs.baylor.edu\/sdsc2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easy Chair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"72","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}