{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:36:49Z","timestamp":1742913409756,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819756742"},{"type":"electronic","value":"9789819756759"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5675-9_40","type":"book-chapter","created":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T01:10:40Z","timestamp":1722474640000},"page":"473-484","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Semantic Enhancement Network Integrating Label Knowledge for Multi-modal Emotion Recognition"],"prefix":"10.1007","author":[{"given":"HongFeng","family":"Zheng","sequence":"first","affiliation":[]},{"given":"ShengFa","family":"Miao","sequence":"additional","affiliation":[]},{"given":"Qian","family":"Yu","sequence":"additional","affiliation":[]},{"given":"YongKang","family":"Mu","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Jin","sequence":"additional","affiliation":[]},{"given":"KeShan","family":"Yan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,1]]},"reference":[{"key":"40_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126866","volume":"561","author":"B Pan","year":"2023","unstructured":"Pan, B., Hirota, K., Jia, Z., Dai, Y.: A review of multimodal emotion recognition from datasets, preprocessing, features, and fusion methods. Neurocomputing 561, 126866 (2023)","journal-title":"Neurocomputing"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Cho, J., Pappagari, R., Kulkarni, P., Villalba, J., Carmiel, Y., Dehak, N.: Deep neural net-works for emotion recognition combining audio and transcripts. In: Interspeech, pp. 247\u2013251 (2018)","DOI":"10.21437\/Interspeech.2018-2466"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Kim, E., Shin, J.W.: DNN-based emotion recognition based on bottleneck acoustic features and lexical features. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6720\u20136724. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683077"},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Ma, J., Tang, H., Zheng, W.L., Lu, B.L.: Emotion recognition using multimodal residual LSTM network. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 176\u2013183 (2019)","DOI":"10.1145\/3343031.3350871"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Wang, Y., Wang, Y.: Multi-level Fusion of Wav2vec 2.0 and BERT for Multi-modal Emotion Recognition. In: Proceedings of the Interspeech 2022, pp. 4725\u20134729 (2022)","DOI":"10.21437\/Interspeech.2022-10230"},{"key":"40_CR6","doi-asserted-by":"crossref","unstructured":"Yoon, S., Byun, S., Dey, S., Jung, K.: Speech emotion recognition using multi-hop attention mechanism. In: ICASSP 2019\u20132019 IEEE International conference on acoustics, speech and signal processing (ICASSP), pp. 2822\u20132826. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683483"},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"Xu, H., Zhang, H., Han, K., Wang, Y., Peng, Y., Li, X.: Learning alignment for multimod-al emotion recognition from speech. In: Proceedings of the Interspeech 2019, pp. 3569\u20133573 (2019)","DOI":"10.21437\/Interspeech.2019-3247"},{"key":"40_CR8","doi-asserted-by":"crossref","unstructured":"Sebastian, J., Pierucci, P.: Fusion techniques for utterance-level emotion recognition combining speech and transcripts. In: Proceedings of the Interspeech 2019, pp. 51\u201355 (2019)","DOI":"10.21437\/Interspeech.2019-3201"},{"issue":"2","key":"40_CR9","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltru\u0161aitis","year":"2018","unstructured":"Baltru\u0161aitis, T., Ahuja, C., Morency, L.P.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"40_CR10","doi-asserted-by":"crossref","unstructured":"Stanley, E., et al.: Emotion label encoding using word embeddings for speech emotion recognition. In: Proceedings of the INTERSPEECH 2023, pp. 2418\u20132422 (2023)","DOI":"10.21437\/Interspeech.2023-1591"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, K., et al.: Description-Enhanced label embedding contrastive learning for text classification. IEEE Trans. Neural Netw. Learn. Syst. 1\u201314 (2024)","DOI":"10.1109\/TNNLS.2023.3282020"},{"key":"40_CR12","doi-asserted-by":"publisher","unstructured":"Wang, P., et al.: Leveraging label information for multimodal emotion recognition. In: Proc. INTERSPEECH 2023, pp. 4219\u20134223 (2023). https:\/\/doi.org\/10.21437\/Interspeech.2023-1732","DOI":"10.21437\/Interspeech.2023-1732"},{"key":"40_CR13","doi-asserted-by":"crossref","unstructured":"Busso, C., et al.: IEMOCAP: Interactive Emotional dyadic Motion Capture database. Lang. Resour. Eval. 42(4), 335\u2013359 (2008)","DOI":"10.1007\/s10579-008-9076-6"},{"issue":"9","key":"40_CR14","doi-asserted-by":"publisher","first-page":"5318","DOI":"10.1109\/TCSVT.2023.3247822","volume":"33","author":"M Hou","year":"2023","unstructured":"Hou, M., Zhang, Z., Liu, C., Lu, G.: Semantic alignment network for multi-modal emotion recognition. IEEE Trans. Circuits Syst. Video Technol. 33(9), 5318\u20135329 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"10","key":"40_CR15","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen, M., He, X., Yang, J., Zhang, H.: 3-d convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Process. Lett. 25(10), 1440\u20131444 (2018)","journal-title":"IEEE Signal Process. Lett."},{"key":"40_CR16","doi-asserted-by":"publisher","first-page":"3592","DOI":"10.1109\/TASLP.2021.3129331","volume":"29","author":"B Chen","year":"2021","unstructured":"Chen, B., Cao, Q., Hou, M., Zhang, Z., Lu, G., Zhang, D.: Multimodal emotion recognition with temporal and semantic consistency. IEEE\/ACM Trans. Audio, Speech Lang. Process. 29, 3592\u20133603 (2021)","journal-title":"IEEE\/ACM Trans. Audio, Speech Lang. Process."},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Wang, S., Ma, Y., Ding, Y.: Exploring complementary features in multi-modal speech emotion recognition. In: ICASSP 2023\u20132023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10096709"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Li, H., Ding, W., Wu, Z., Liu, Z.: Learning fine-grained cross modality excitement for speech emotion recognition. In: Proceedings of the Interspeech 2021, pp. 3375\u20133379 (2021)","DOI":"10.21437\/Interspeech.2021-158"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Chen, W., Xing, X., Xu, X., Yang, J., Pang, J.: Key-sparse transformer for multimodal speech emotion recognition. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6897\u20136901. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746598"},{"key":"40_CR20","doi-asserted-by":"publisher","first-page":"3771","DOI":"10.1109\/TASLP.2023.3316458","volume":"31","author":"Z Zhao","year":"2023","unstructured":"Zhao, Z., Wang, Y., Shen, G., Xu, Y., Zhang, J.: TDFNET: Transformer-Based Deep-Scale Fusion Network for multimodal emotion recognition. IEEE\/ACM Trans. Audio, Speech Lang. Process. 31, 3771\u20133782 (2023)","journal-title":"IEEE\/ACM Trans. Audio, Speech Lang. Process."},{"key":"40_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Y., Shen, G., Xu, Y., Li, J., Zhao, Z.: Learning mutual correlation in multimodal transformer for speech emotion recognition. In: Interspeech, pp. 4518\u20134522 (2021)","DOI":"10.21437\/Interspeech.2021-2004"},{"key":"40_CR22","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhang, C., Woodland, P.C.: Emotion recognition by fusing time synchronous and time asynchronous representations. In: ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6269\u20136273. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9414880"},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Gao, T., Wang, H., Schuller, B.W.: SWRR: feature map classifier based on sliding window attention and high-response feature reuse for multimodal emotion recognition. In: Proceedings of the INTERSPEECH 2023, pp. 2433\u20132437 (2023)","DOI":"10.21437\/Interspeech.2023-413"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5675-9_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T01:22:50Z","timestamp":1722475370000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5675-9_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819756742","9789819756759"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5675-9_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2024\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}