{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T04:39:17Z","timestamp":1728535157733},"reference-count":25,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"6","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2023,6,1]]},"DOI":"10.1587\/transfun.2022eap1091","type":"journal-article","created":{"date-parts":[[2022,12,7]],"date-time":"2022-12-07T22:09:47Z","timestamp":1670450987000},"page":"876-885","source":"Crossref","is-referenced-by-count":0,"title":["A Multitask Learning Approach Based on Cascaded Attention Network and Self-Adaption Loss for Speech Emotion Recognition"],"prefix":"10.1587","volume":"E106.A","author":[{"given":"Yang","family":"LIU","sequence":"first","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Yuqi","family":"XIA","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Haoqin","family":"SUN","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Xiaolei","family":"MENG","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Jianxiong","family":"BAI","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Wenbo","family":"GUAN","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Zhen","family":"ZHAO","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Qingdao University of Science and Technology"}]},{"given":"Yongwei","family":"LI","sequence":"additional","affiliation":[{"name":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"publisher","unstructured":"[1] S. Zhang, S. Zhang, T. Huang, and W. Gao, \u201cSpeech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching,\u201d IEEE Trans. Multimedia, vol.20, no.6, pp.1576-1590, 2017. 10.1109\/tmm.2017.2766843","DOI":"10.1109\/TMM.2017.2766843"},{"key":"2","doi-asserted-by":"publisher","unstructured":"[2] C.M. Lee and S.S. Narayanan, \u201cToward detecting emotions in spoken dialogs,\u201d IEEE Trans. Speech Audio Process., vol.13, no.2, pp.293-303, 2005. 10.1109\/tsa.2004.838534","DOI":"10.1109\/TSA.2004.838534"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] B. Schuller, G. Rigoll, and M. Lang, \u201cSpeech emotion recognition combining acoustic features and linguistic information in a hybrid support vector machine-belief network architecture,\u201d 2004 IEEE international conference on acoustics, speech, and signal processing, vol.1, pp.I-577, IEEE, 2004. 10.1109\/icassp.2004.1326051","DOI":"10.1109\/ICASSP.2004.1326051"},{"key":"4","doi-asserted-by":"publisher","unstructured":"[4] R. Chatterjee, S. Mazumdar, R.S. Sherratt, R. Halder, T. Maitra, and D. Giri, \u201cReal-time speech emotion analysis for smart home assistants,\u201d IEEE Trans. Consum. Electron., vol.67, no.1, pp.68-76, 2021. 10.1109\/tce.2021.3056421","DOI":"10.1109\/TCE.2021.3056421"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] K. Mangalam and T. Guha, \u201cLearning spontaneity to improve emotion recognition in speech,\u201d arXiv preprint arXiv:1712.04753, 2017. 10.48550\/arXiv.1712.04753","DOI":"10.21437\/Interspeech.2018-1872"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] M. Slaney and G. McRoberts, \u201cBaby ears: A recognition system for affective vocalizations,\u201d Proc. 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP&apos;98 (Cat. no. 98CH36181), vol.2, pp.985-988, IEEE, 1998. 10.1109\/icassp.1998.675432","DOI":"10.1109\/ICASSP.1998.675432"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] B. Schuller, G. Rigoll, and M. Lang, \u201cHidden Markov model-based speech emotion recognition,\u201d 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP&apos;03), 2003. 10.1109\/icassp.2003.1202279","DOI":"10.1109\/ICME.2003.1220939"},{"key":"8","doi-asserted-by":"publisher","unstructured":"[8] Z. Liu, M. Wu, W. Cao, J. Mao, J. Xu, and G. Tan, \u201cSpeech emotion recognition based on feature selection and extreme learning machine decision tree,\u201d Neurocomputing, vol.273, pp.271-280, 2018. 10.1016\/j.neucom.2017.07.050","DOI":"10.1016\/j.neucom.2017.07.050"},{"key":"9","doi-asserted-by":"publisher","unstructured":"[9] Q. Mao, M. Dong, Z. Huang, and Y. Zhan, \u201cLearning salient features for speech emotion recognition using convolutional neural networks,\u201d IEEE Trans. Multimedia, vol.16, no.8, pp.2203-2213, 2014. 10.1109\/tmm.2014.2360798","DOI":"10.1109\/TMM.2014.2360798"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] J. Lee and I. Tashev, \u201cHigh-level feature representation using recurrent neural network for speech emotion recognition,\u201d Interspeech 2015, pp.1537-1540, 2015. 10.21437\/interspeech.2015-336","DOI":"10.21437\/Interspeech.2015-336"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] G. Trigeorgis, F. Ringeval, R. Brueckner, E. Marchi, M.A. Nicolaou, B. Schuller, and S. Zafeiriou, \u201cAdieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network,\u201d 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp.5200-5204, IEEE, 2016. 10.1109\/icassp.2016.7472669","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"12","doi-asserted-by":"publisher","unstructured":"[12] M. Chen, X. He, J. Yang, and H. Zhang, \u201c3-D convolutional recurrent neural networks with attention model for speech emotion recognition,\u201d IEEE Signal Process. Lett., vol.25, no.10, pp.1440-1444, 2018. 10.1109\/lsp.2018.2860246","DOI":"10.1109\/LSP.2018.2860246"},{"key":"13","doi-asserted-by":"crossref","unstructured":"[13] P. Li, Y. Song, I.V. McLoughlin, W. Guo, and L.-R. Dai, \u201cAn attention pooling based representation learning method for speech emotion recognition,\u201d Interspeech 2018, pp.3087-3091, 2018. 10.21437\/interspeech.2018-1242","DOI":"10.21437\/Interspeech.2018-1242"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] C. Huang and S.S. Narayanan, \u201cDeep convolutional recurrent neural network with attention mechanism for robust speech emotion recognition,\u201d 2017 IEEE international conference on multimedia and expo (ICME), pp.583-588, IEEE, 2017. 10.1109\/icme.2017.8019296","DOI":"10.1109\/ICME.2017.8019296"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] S. Mirsamadi, E. Barsoum, and C. Zhang, \u201cAutomatic speech emotion recognition using recurrent neural networks with local attention,\u201d 2017 IEEE International conference on acoustics, speech and signal processing (ICASSP), pp.2227-2231, IEEE, 2017. 10.1109\/icassp.2017.7952552","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"16","doi-asserted-by":"crossref","unstructured":"[16] J. Hu, L. Shen, and G. Sun, \u201cSqueeze-and-excitation networks,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018. 10.1109\/cvpr.2018.00745","DOI":"10.1109\/CVPR.2018.00745"},{"key":"17","unstructured":"[17] J. Te, C. Zhigang, and W. Yongjing, \u201cMulti-task learning 3D CNN-BLSTM with attention mechanism for speech emotion recognition,\u201d Journal of East China University of Science and Technology, vol.48, no.4, pp.534-542, 2022. 10.14135\/j.cnki.1006-3080.20210326001"},{"key":"18","doi-asserted-by":"publisher","unstructured":"[18] T.J. Trull and C.A. Durrett, \u201cCategorical and dimensional models of personality disorder,\u201d Annual Review of Clinical Psychology, vol.1, no.1, pp.355-380, 2005. 10.1146\/annurev.clinpsy.1.102803.144009","DOI":"10.1146\/annurev.clinpsy.1.102803.144009"},{"key":"19","unstructured":"[19] D. Ververidis and C. Kotropoulos, \u201cAutomatic speech classification to five emotional states based on gender information,\u201d 2004 12th European Signal Processing Conference. IEEE, pp.341-344, 2004."},{"key":"20","unstructured":"[20] T. Vogt and E. Andr\u00e9, \u201cImproving automatic emotion recognition from speech via gender differentiation,\u201d Proc. Lang. Resour. Eval. Conf., pp.1123-1126, 2006."},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] Y. Li, T. Zhao, and T. Kawahara, \u201cImproved end-to-end speech emotion recognition using self attention mechanism and multitask learning,\u201d Interspeech 2019, pp.2803-2807, 2019. 10.21437\/interspeech.2019-2594","DOI":"10.21437\/Interspeech.2019-2594"},{"key":"22","doi-asserted-by":"publisher","unstructured":"[22] C. Busso, M. Bulut, C. Lee, A. Kazemzadeh, E. Mower, S. Kim, J.N. Chang, S. Lee, and S.S. Narayanan, \u201cIEMOCAP: Interactive emotional dyadic motion capture database,\u201d Language Resources and Evaluation, vol.42, no.4, pp.335-359, 2008. 10.1007\/s10579-008-9076-6","DOI":"10.1007\/s10579-008-9076-6"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] K. Han, D. Yu, and I. Tashev, \u201cSpeech emotion recognition using deep neural network and extreme learning machine,\u201d Interspeech 2014, pp.223-227, 2014. 10.21437\/interspeech.2014-57","DOI":"10.21437\/Interspeech.2014-57"},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] X. Wu, S. Liu, Y. Cao, X. Li, J. Yu, D. Dai, X. Ma, S. Hu, Z. Wu, X. Liu, and H. Meng, \u201cSpeech emotion recognition using capsule networks,\u201d ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2019. 10.1109\/icassp.2019.8683163","DOI":"10.1109\/ICASSP.2019.8683163"},{"key":"25","doi-asserted-by":"publisher","unstructured":"[25] R. Xia and Y. Liu, \u201cA multi-task learning framework for emotion recognition using 2D continuous space,\u201d IEEE Trans. Affective Comput., vol.8, no.1, pp.3-14, 2015. 10.1109\/taffc.2015.2512598","DOI":"10.1109\/TAFFC.2015.2512598"}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E106.A\/6\/E106.A_2022EAP1091\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T01:49:52Z","timestamp":1728524992000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E106.A\/6\/E106.A_2022EAP1091\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,1]]},"references-count":25,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2023]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2022eap1091","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"type":"print","value":"0916-8508"},{"type":"electronic","value":"1745-1337"}],"subject":[],"published":{"date-parts":[[2023,6,1]]},"article-number":"2022EAP1091"}}