{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T05:04:47Z","timestamp":1743138287244,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819981403"},{"type":"electronic","value":"9789819981410"}],"license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8141-0_27","type":"book-chapter","created":{"date-parts":[[2023,11,25]],"date-time":"2023-11-25T09:02:16Z","timestamp":1700902936000},"page":"357-373","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AudioFormer: Channel Audio Encoder Based on\u00a0Multi-granularity Features"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5827-7094","authenticated-orcid":false,"given":"Jialin","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2674-3721","authenticated-orcid":false,"given":"Yunfeng","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6619-591X","authenticated-orcid":false,"given":"Borui","family":"Miao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5141-9376","authenticated-orcid":false,"given":"Shaojie","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"key":"27_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: International Conference on Learning Representations (2014)"},{"issue":"4","key":"27_CR2","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: Iemocap: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42(4), 335\u2013359 (2008)","journal-title":"Lang. Resour. Eval."},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Chen, W., Xing, X., Xu, X., Pang, J., Du, L.: Speechformer: a hierarchical efficient framework incorporating the characteristics of speech (2022)","DOI":"10.21437\/Interspeech.2022-74"},{"key":"27_CR4","unstructured":"Cowie, R., Douglas-Cowie, E., Tsapatsoulis, N., Votsis, G., Taylor, J.: Emotion recognition in hci. Signal Process. Mag. IEEE (2001)"},{"issue":"2","key":"27_CR5","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"27_CR6","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inform. Process. Syst. 25(2) (2012)"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Lea, C., Flynn, M.D., Vidal, R., Reiter, A., Hager, G.D.: Temporal convolutional networks for action segmentation and detection. IEEE Computer Society (2016)","DOI":"10.1109\/CVPR.2017.113"},{"issue":"7553","key":"27_CR8","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y Lecun","year":"2015","unstructured":"Lecun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"issue":"3","key":"27_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114683","volume":"173","author":"D Li","year":"2021","unstructured":"Li, D., Liu, J., Yang, Z., Sun, L., Wang, Z.: Speech emotion recognition using recurrent neural networks with directional self-attention. Expert Syst. Appl. 173(3), 114683 (2021)","journal-title":"Expert Syst. Appl."},{"key":"27_CR10","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1016\/j.ins.2020.09.047","volume":"548","author":"D Li","year":"2021","unstructured":"Li, D., Zhou, Y., Wang, Z., Gao, D.: Exploiting the potentialities of features for speech emotion recognition. Inf. Sci. 548, 328\u2013343 (2021)","journal-title":"Inf. Sci."},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Mcfee, B., Raffel, C., Liang, D., Ellis, D., Nieto, O.: librosa: audio and music signal analysis in python. In: Python in Science Conference (2015)","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Mirsamadi, S., Barsoum, E., Zhang, C.: Automatic speech emotion recognition using recurrent neural networks with local attention. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2017)","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"27_CR13","unstructured":"Mnih, V., Heess, N., Graves, A., Kavukcuoglu, K.: Recurrent models of visual attention. Adv. Neural Inform. Process. Syst. 3 (2014)"},{"key":"27_CR14","unstructured":"Padi, S., Manocha, D., Sriram, R.D.: Multi-window data augmentation approach for speech emotion recognition (2020)"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Pang, B.: Thumbs up? sentiment classification using machine learning techniques. In: Proceedings of EMNLP, Philadelphia. PA, USA, July 2002 (2002)","DOI":"10.3115\/1118693.1118704"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Peng, Z., Lu, Y., Pan, S., Liu, Y.: Efficient speech emotion recognition using multi-scale cnn and attention (2021)","DOI":"10.1109\/ICASSP39728.2021.9414286"},{"key":"27_CR17","unstructured":"Powers, D.M.W.: Evaluation: from precision, recall and f-measure to roc, informedness, markedness and correlation (2020)"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Qadri, S.A.A., Gunawan, T.S., Kartiwi, M., Mansor, H., Wani, T.M.: Speech emotion recognition using feature fusion of teo and mfcc on multilingual databases (2022)","DOI":"10.1007\/978-981-33-4597-3_61"},{"key":"27_CR19","unstructured":"Rozgi, V., Ananthakrishnan, S., Saleem, S., Kumar, R., Prasad, R.: Ensemble of svm trees for multimodal emotion recognition. In: Signal & Information Processing Association Summit & Conference (2012)"},{"key":"27_CR20","unstructured":"Sahu, G.: Multimodal speech emotion recognition and ambiguity resolution (2019)"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Schmid, F., Koutini, K., Widmer, G.: Low-complexity audio embedding extractors. arXiv preprint arXiv:2303.01879 (2023)","DOI":"10.23919\/EUSIPCO58844.2023.10289815"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Shirian, A., Guha, T.: Compact graph architecture for speech emotion recognition (2020)","DOI":"10.1109\/ICASSP39728.2021.9413876"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Sze, V., Chen, Y.H., Yang, T.J., Emer, J.S.: Efficient processing of deep neural networks: a tutorial and survey. Proceedings of the IEEE 105(12) (2017)","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"27_CR24","unstructured":"Tripathi, S., Kumar, A., Ramesh, A., Singh, C., Yenigalla, P.: Deep learning based emotion recognition system using speech features and transcriptions (2019)"},{"issue":"14","key":"27_CR25","first-page":"24","volume":"2","author":"C Vinola","year":"2015","unstructured":"Vinola, C., Vimaladevi, K.: A survey on human emotion recognition approaches, databases and applications. Elect. Lett. Comput. Vis. Image Anal. 2(14), 24\u201344 (2015)","journal-title":"Elect. Lett. Comput. Vis. Image Anal."},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Xu, Y., Xu, H., Zou, J.: Hgfm : a hierarchical grained and feature model for acoustic emotion recognition. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2020)","DOI":"10.1109\/ICASSP40776.2020.9053039"},{"key":"27_CR27","unstructured":"Yazdani, A., Shekofteh, Y.: A persian asr-based ser: modification of sharif emotional speech database and investigation of persian text corpora. arXiv preprint arXiv:2211.09956 (2022)"},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Yoon, S., Byun, S., Jung, K.: Multimodal speech emotion recognition using audio and text. In: IEEE SLT 2018 (2018)","DOI":"10.1109\/SLT.2018.8639583"},{"key":"27_CR29","unstructured":"Yue\u00a0Xibin, Hu\u00a0Xiaolin, T.L.: The influence of the number of parameters in each layer of deep learning model on performance (in chinese). Comput. Sci. Appli. (2015)"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Zhu, W., Li, X.: Speech emotion recognition with global-aware fusion on multi-scale feature representation (2022)","DOI":"10.1109\/ICASSP43922.2022.9747517"},{"key":"27_CR31","doi-asserted-by":"crossref","unstructured":"Zou, H., Si, Y., Chen, C., Rajan, D., Chng, E.S.: Speech emotion recognition with co-attention based multi-level acoustic information (2022)","DOI":"10.1109\/ICASSP43922.2022.9747095"}],"container-title":["Communications in Computer and Information Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8141-0_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T15:41:21Z","timestamp":1710344481000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8141-0_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"ISBN":["9789819981403","9789819981410"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8141-0_27","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"26 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}