{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:10:15Z","timestamp":1750219815153,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,5,26]],"date-time":"2023-05-26T00:00:00Z","timestamp":1685059200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Jilin Science and Technology Department","award":["20210201051GX"],"award-info":[{"award-number":["20210201051GX"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,5,26]]},"DOI":"10.1145\/3603781.3603919","type":"proceedings-article","created":{"date-parts":[[2023,7,27]],"date-time":"2023-07-27T18:02:29Z","timestamp":1690480949000},"page":"776-781","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Speech emotion recognition algorithm based on bimodality and attention mechanism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2545-3420","authenticated-orcid":false,"given":"Huangshui","family":"Hu","sequence":"first","affiliation":[{"name":"Changchun University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2478-0062","authenticated-orcid":false,"given":"Hongyu","family":"Sun","sequence":"additional","affiliation":[{"name":"Changchun University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6881-7602","authenticated-orcid":false,"given":"Peisong","family":"Xie","sequence":"additional","affiliation":[{"name":"Changchun University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7021-1924","authenticated-orcid":false,"given":"Nanhao","family":"Shen","sequence":"additional","affiliation":[{"name":"Changchun University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4493-2593","authenticated-orcid":false,"given":"Mei","family":"Han","sequence":"additional","affiliation":[{"name":"Changchun University of Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2023,7,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11235-011-9624-z"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2110.03435"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/s21144913"},{"key":"e_1_3_2_1_4_1","volume-title":"librosa: Audio and music signal analysis in python[C]\/\/Proceedings of the 14th python in science conference","author":"McFee B","year":"2015","unstructured":"McFee B , Raffel C , Liang D , librosa: Audio and music signal analysis in python[C]\/\/Proceedings of the 14th python in science conference . 2015 , 8: 18-25 McFee B, Raffel C, Liang D, librosa: Audio and music signal analysis in python[C]\/\/Proceedings of the 14th python in science conference. 2015, 8: 18-25"},{"key":"e_1_3_2_1_5_1","volume-title":"Specaugment: A simple data augmentation method for automatic speech recognition[J]. arXiv preprint arXiv:1904.08779","author":"Park D S","year":"2019","unstructured":"Park D S , Chan W , Zhang Y , Specaugment: A simple data augmentation method for automatic speech recognition[J]. arXiv preprint arXiv:1904.08779 , 2019 . Park D S, Chan W, Zhang Y, Specaugment: A simple data augmentation method for automatic speech recognition[J]. arXiv preprint arXiv:1904.08779, 2019."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00021"},{"key":"e_1_3_2_1_7_1","volume-title":"ECA-Net: efficient channel attention for deep convolutional neural networks[C]\\\\2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition ( CVPR) .Seattle","author":"Qilong WU","year":"2020","unstructured":"WANG Qilong , WU Banggu , ZHU Pengfei , et al. ECA-Net: efficient channel attention for deep convolutional neural networks[C]\\\\2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition ( CVPR) .Seattle , USA : IEEE, 2020 : 11531-11539.Li D, Liu J, Yang Z, Speech emotion recognition using recurrent neural networks with directional self-attention[J]. Expert Systems with Applications , 2021, 173: 114683. WANG Qilong, WU Banggu, ZHU Pengfei, et al. ECA-Net: efficient channel attention for deep convolutional neural networks[C]\\\\2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition ( CVPR) .Seattle, USA: IEEE, 2020: 11531-11539.Li D, Liu J, Yang Z, Speech emotion recognition using recurrent neural networks with directional self-attention[J]. Expert Systems with Applications, 2021, 173: 114683."},{"key":"e_1_3_2_1_8_1","volume-title":"Lee C C","author":"Busso C","year":"2008","unstructured":"Busso C , Bulut M , Lee C C , IEMOCAP : Interactive emotional dyadic motion capture database[J]. Language resources and evaluation, 2008 , 42(4): 335-359. Busso C, Bulut M, Lee C C, IEMOCAP: Interactive emotional dyadic motion capture database[J]. Language resources and evaluation, 2008, 42(4): 335-359."},{"key":"e_1_3_2_1_9_1","volume-title":"Meld: A multimodal multi-party dataset for emotion recognition in conversations[J]. arXiv preprint arXiv:1810.02508","author":"Poria S","year":"2018","unstructured":"Poria S , Hazarika D , Majumder N , Meld: A multimodal multi-party dataset for emotion recognition in conversations[J]. arXiv preprint arXiv:1810.02508 , 2018 . Poria S, Hazarika D, Majumder N, Meld: A multimodal multi-party dataset for emotion recognition in conversations[J]. arXiv preprint arXiv:1810.02508, 2018."},{"key":"e_1_3_2_1_10_1","volume-title":"Efficient estimation of word representations in vector space[J]. arXiv preprint arXiv:1301.3781","author":"Mikolov T","year":"2013","unstructured":"Mikolov T , Chen K , Corrado G , Efficient estimation of word representations in vector space[J]. arXiv preprint arXiv:1301.3781 , 2013 . Mikolov T, Chen K, Corrado G, Efficient estimation of word representations in vector space[J]. arXiv preprint arXiv:1301.3781, 2013."},{"key":"e_1_3_2_1_11_1","volume-title":"Distributed representations of words and phrases and their compositionality[J]. Advances in neural information processing systems","author":"Mikolov T","year":"2013","unstructured":"Mikolov T , Sutskever I , Chen K , Distributed representations of words and phrases and their compositionality[J]. Advances in neural information processing systems , 2013 , 26. Mikolov T, Sutskever I, Chen K, Distributed representations of words and phrases and their compositionality[J]. Advances in neural information processing systems, 2013, 26."},{"key":"e_1_3_2_1_12_1","volume-title":"A structured self-attentive sentence embedding[J]. arXiv preprint arXiv:1703.03130","author":"Lin Z","year":"2017","unstructured":"Lin Z , Feng M , Santos C N , A structured self-attentive sentence embedding[J]. arXiv preprint arXiv:1703.03130 , 2017 . Lin Z, Feng M, Santos C N, A structured self-attentive sentence embedding[J]. arXiv preprint arXiv:1703.03130, 2017."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114683"},{"key":"e_1_3_2_1_14_1","first-page":"192","article-title":"A Bi-modal Emotion Recognition Model for Speech-Text Based on Bi-LSTM-CNN[J]","volume":"2022","unstructured":"WANG L, WANG W Y, CHEN X . A Bi-modal Emotion Recognition Model for Speech-Text Based on Bi-LSTM-CNN[J] . Computer Engineering and Applications 2022 ,58(04): 192 - 197 . WANG L, WANG W Y, CHEN X. A Bi-modal Emotion Recognition Model for Speech-Text Based on Bi-LSTM-CNN[J]. Computer Engineering and Applications 2022,58(04):192-197.","journal-title":"Computer Engineering and Applications"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Chudasama V Kar P Gudmalwar A M2FNet: Multi-modal Fusion Network for Emotion Recognition in Conversation[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 4652-4661.  Chudasama V Kar P Gudmalwar A M2FNet: Multi-modal Fusion Network for Emotion Recognition in Conversation[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 4652-4661.","DOI":"10.1109\/CVPRW56347.2022.00511"},{"key":"e_1_3_2_1_16_1","volume-title":"Multimodal speech emotion recognition and ambiguity resolution[J]. arXiv preprint arXiv:1904.06022","author":"Sahu G.","year":"2019","unstructured":"Sahu G. Multimodal speech emotion recognition and ambiguity resolution[J]. arXiv preprint arXiv:1904.06022 , 2019 . Sahu G. Multimodal speech emotion recognition and ambiguity resolution[J]. arXiv preprint arXiv:1904.06022, 2019."},{"volume-title":"Context-dependent sentiment analysis in user-generated videos[C]\/\/Proceedings of the 55th annual meeting of the association for computational linguistics (volume 1: Long papers). 2017: 873-883","author":"Poria S","key":"e_1_3_2_1_17_1","unstructured":"Poria S , Cambria E , Hazarika D , Context-dependent sentiment analysis in user-generated videos[C]\/\/Proceedings of the 55th annual meeting of the association for computational linguistics (volume 1: Long papers). 2017: 873-883 . Poria S, Cambria E, Hazarika D, Context-dependent sentiment analysis in user-generated videos[C]\/\/Proceedings of the 55th annual meeting of the association for computational linguistics (volume 1: Long papers). 2017: 873-883."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Zhang D Wu L Sun C Modeling both Context-and Speaker-Sensitive Dependence for Emotion Detection in Multi-speaker Conversations[C]\/\/IJCAI. 2019: 5415-5421.  Zhang D Wu L Sun C Modeling both Context-and Speaker-Sensitive Dependence for Emotion Detection in Multi-speaker Conversations[C]\/\/IJCAI. 2019: 5415-5421.","DOI":"10.24963\/ijcai.2019\/752"},{"key":"e_1_3_2_1_19_1","volume-title":"Hazarika D","author":"Majumder N","year":"2019","unstructured":"Majumder N , Poria S , Hazarika D , Dialoguernn : An attentive rnn for emotion detection in conversations[C]\/\/Proceedings of the AAAI conference on artificial intelligence. 2019 , 33(01): 6818-6825. Majumder N, Poria S, Hazarika D, Dialoguernn: An attentive rnn for emotion detection in conversations[C]\/\/Proceedings of the AAAI conference on artificial intelligence. 2019, 33(01): 6818-6825."},{"key":"e_1_3_2_1_20_1","volume-title":"Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation[J]. arXiv preprint arXiv:1908.11540","author":"Ghosal D","year":"2019","unstructured":"Ghosal D , Majumder N , Poria S , Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation[J]. arXiv preprint arXiv:1908.11540 , 2019 . Ghosal D, Majumder N, Poria S, Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation[J]. arXiv preprint arXiv:1908.11540, 2019."}],"event":{"name":"CNIOT'23: 2023 4th International Conference on Computing, Networks and Internet of Things","acronym":"CNIOT'23","location":"Xiamen China"},"container-title":["Proceedings of the 2023 4th International Conference on Computing, Networks and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603781.3603919","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603781.3603919","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:06Z","timestamp":1750178766000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603781.3603919"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,26]]},"references-count":20,"alternative-id":["10.1145\/3603781.3603919","10.1145\/3603781"],"URL":"https:\/\/doi.org\/10.1145\/3603781.3603919","relation":{},"subject":[],"published":{"date-parts":[[2023,5,26]]},"assertion":[{"value":"2023-07-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}