{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:42:06Z","timestamp":1778168526586,"version":"3.51.4"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T00:00:00Z","timestamp":1592611200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T00:00:00Z","timestamp":1592611200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of P. R. China","doi-asserted-by":"crossref","award":["No.61572260"],"award-info":[{"award-number":["No.61572260"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100012154","name":"Graduate Research and Innovation Projects of Jiangsu Province","doi-asserted-by":"publisher","award":["No.46035CX17789"],"award-info":[{"award-number":["No.46035CX17789"]}],"id":[{"id":"10.13039\/501100012154","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s11265-020-01538-x","type":"journal-article","created":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T09:02:33Z","timestamp":1592643753000},"page":"299-308","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Coarse-to-Fine Speech Emotion Recognition Based on Multi-Task Learning"],"prefix":"10.1007","volume":"93","author":[{"given":"Zhao","family":"Huijuan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ye","family":"Ning","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wang","family":"Ruchuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,6,20]]},"reference":[{"issue":"4","key":"1538_CR1","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., Bulut, M., Lee, C., Kazemzadeh, A., Mower, E., Kim, S., Chang, J. N., Lee, S., & Narayanan, S. (2008). IEMOCAP: Interactive emotional dyadic motion capture database. Language Resources and Evaluation, 42(4), 335\u2013359. https:\/\/doi.org\/10.1007\/s10579-008-9076-6.","journal-title":"Language Resources and Evaluation"},{"issue":"10","key":"1538_CR2","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen, M., He, X., Yang, J., & Zhang, H. (2018). 3-d convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Processing Letters, 25(10), 1440\u20131444. https:\/\/doi.org\/10.1109\/LSP.2018.2860246.","journal-title":"IEEE Signal Processing Letters"},{"key":"1538_CR3","doi-asserted-by":"publisher","unstructured":"Dai, D., Wu, Z., Li, R., Wu, X., Jia, J., & Meng, H. (2019). Learning discriminative features from spectrograms using center loss for speech emotion recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2019, Brighton, United Kingdom, May 12-17, 2019 (pp. 7405\u20137409), DOI https:\/\/doi.org\/10.1109\/ICASSP.2019.8683765, (to appear in print).","DOI":"10.1109\/ICASSP.2019.8683765"},{"issue":"5","key":"1538_CR4","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1134\/S1995425519050044","volume":"12","author":"NM Derzhavina","year":"2019","unstructured":"Derzhavina, N. M. (2019). Experience of a synthetic approach to an ecological classification of vascular epiphytes. Contemporary Problems of Ecology, 12(5), 434\u2013443.","journal-title":"Contemporary Problems of Ecology"},{"key":"1538_CR5","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.patrec.2018.05.020","volume":"112","author":"X He","year":"2018","unstructured":"He, X., Song, Y., & Zhang, Y. (2018). A coarse-to-fine scene text detection method based on skeleton-cut detector and binary-tree-search based rectification. Pattern Recognition Letters, 112, 27\u201333. https:\/\/doi.org\/10.1016\/j.patrec.2018.05.020.","journal-title":"Pattern Recognition Letters"},{"key":"1538_CR6","doi-asserted-by":"crossref","unstructured":"Huang, Z., & Epps, J. (2018). Prediction of emotion change from speech, 2018.","DOI":"10.3389\/fict.2018.00011"},{"key":"1538_CR7","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1109\/TIP.2019.2926748","volume":"29","author":"L Jing","year":"2020","unstructured":"Jing, L., Chen, Y., & Tian, Y. (2020). Coarse-to-fine semantic segmentation from image-level labels. IEEE Transactions Image Processing, 29, 225\u2013236. https:\/\/doi.org\/10.1109\/TIP.2019.2926748.","journal-title":"IEEE Transactions Image Processing"},{"key":"1538_CR8","doi-asserted-by":"crossref","unstructured":"Khalil, R. A., Jones, E., Babar, M. I., Jan, T., Zafar, M. H., & Alhussain, T. (2019). Speech emotion recognition using deep learning techniques: A review. IEEE Access 7 117327\u2013117345.","DOI":"10.1109\/ACCESS.2019.2936124"},{"key":"1538_CR9","doi-asserted-by":"crossref","unstructured":"Li, J., Qiu, M., Niu, J., Gao, W., Zong, Z., & Qin, X. (2010). Feedback dynamic algorithms for preemptable job scheduling in cloud systems. 1, 561\u2013564.","DOI":"10.1109\/WI-IAT.2010.30"},{"key":"1538_CR10","doi-asserted-by":"publisher","unstructured":"Ma, F., Chitta, R., Zhou, J., You, Q., Sun, T., & Gao, J. (2017). Dipole: Diagnosis prediction in healthcare via attention-based bidirectional recurrent neural networks. In Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, Halifax, NS, Canada, August 13 - 17, 2017 (pp. 1903\u20131911), DOI https:\/\/doi.org\/10.1145\/3097983.3098088, (to appear in print).","DOI":"10.1145\/3097983.3098088"},{"key":"1538_CR11","doi-asserted-by":"publisher","unstructured":"Ma, X., Wu, Z., Jia, J., Xu, M., Meng, H., & Cai, L. (2018). Emotion recognition from variable-length speech segments using deep learning on spectrograms. In Interspeech 2018, 19th annual conference of the international speech communication association, Hyderabad, India, 2-6 September 2018 (pp. 3683\u20133687), DOI https:\/\/doi.org\/10.21437\/Interspeech.2018-2228, (to appear in print).","DOI":"10.21437\/Interspeech.2018-2228"},{"key":"1538_CR12","doi-asserted-by":"publisher","unstructured":"Ma, Y., Liu, X., Bai, S., Wang, L., He, D., & Liu, A. (2019). Coarse-to-fine image inpainting via region-wise convolutions and non-local correlation. In Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10-16, 2019 (pp. 3123\u20133129), DOI https:\/\/doi.org\/10.24963\/ijcai.2019\/433, (to appear in print).","DOI":"10.24963\/ijcai.2019\/433"},{"key":"1538_CR13","doi-asserted-by":"crossref","unstructured":"Marinoiu, E., Zanfir, M., Olaru, V., & Sminchisescu, C. (2018). 3d human sensing, action and emotion recognition in robot assisted therapy of children with autism. In 2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2018, Salt Lake City, UT, USA, June 18-22, 2018. http:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Marinoiu_3D_Human_Sensing_CVPR_2018_paper.html (pp. 2158\u20132167), DOI 10.1109\/CVPR.2018.00230, (to appear in print).","DOI":"10.1109\/CVPR.2018.00230"},{"key":"1538_CR14","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.bspc.2015.05.002","volume":"21","author":"I Mazic","year":"2015","unstructured":"Mazic, I., Bonkovic, M., & Dzaja, B. (2015). Two-level coarse-to-fine classification algorithm for asthma wheezing recognition in children\u2019s respiratory sounds. Biomedical Signal Processing and Control, 21, 105\u2013118. https:\/\/doi.org\/10.1016\/j.bspc.2015.05.002.","journal-title":"Biomedical Signal Processing and Control"},{"key":"1538_CR15","doi-asserted-by":"crossref","unstructured":"Meng, H., Yan, T., Yuan, F., & Wei, H. (2019). Speech emotion recognition from 3d log-mel spectrograms with deep learning network. IEEE Access 7 125868\u2013125881.","DOI":"10.1109\/ACCESS.2019.2938007"},{"issue":"2","key":"1538_CR16","first-page":"67","volume":"5","author":"GE Myers","year":"1969","unstructured":"Myers, G.E. (1969). William James\u2019s Theory of Emotion. Transactions of the Charles S Peirce Society, 5(2), 67\u201389.","journal-title":"Transactions of the Charles S Peirce Society"},{"key":"1538_CR17","doi-asserted-by":"crossref","unstructured":"Qiu, H., Noura, H., Qiu, M., Ming, Z., & Memmi, G. (2019). A user-centric data protection method for cloud storage based on invertible dwt. IEEE Transactions on Cloud Computing 1\u20131.","DOI":"10.1109\/TCC.2019.2911679"},{"key":"1538_CR18","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1016\/j.inffus.2019.02.002","volume":"50","author":"H Qiu","year":"2019","unstructured":"Qiu, H., Qiu, M., Zhihui, L. U., & Memmi, G. (2019). An efficient key distribution system for data fusion in v2x heterogeneous networks. Information Fusion, 50, 212\u2013220.","journal-title":"Information Fusion"},{"issue":"4","key":"1538_CR19","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/j.jpdc.2007.06.014","volume":"68","author":"M Qiu","year":"2008","unstructured":"Qiu, M., Sha, E. H., Liu, M., Lin, M., Hua, S., & Yang, L. T. (2008). Energy minimization with loop fusion and multi-functional-unit scheduling for multidimensional DSP. Journal of Parallel and Distributed Computing, 68(4), 443\u2013455. https:\/\/doi.org\/10.1016\/j.jpdc.2007.06.014.","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"4","key":"1538_CR20","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/j.jpdc.2007.06.014","volume":"68","author":"M Qiu","year":"2008","unstructured":"Qiu, M., Sha, E. H. M., Liu, M., Lin, M., Hua, S., & Yang, L. T. (2008). Energy minimization with loop fusion and multi-functional-unit scheduling for multidimensional dsp. Journal of Parallel and Distributed Computing, 68(4), 443\u2013455.","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"1538_CR21","unstructured":"Rabiee, A., Kim, T., & Lee, S. (2019). Adjusting pleasure-arousal-dominance for continuous emotional text-to-speech synthesizer. arXiv:1906.05507."},{"issue":"5","key":"1538_CR22","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1145\/3129340","volume":"61","author":"BW Schuller","year":"2018","unstructured":"Schuller, B. W. (2018). Speech emotion recognition: two decades in a nutshell, benchmarks, and ongoing trends. Communications of the ACM, 61(5), 90\u201399. https:\/\/doi.org\/10.1145\/3129340.","journal-title":"Communications of the ACM"},{"issue":"4","key":"1538_CR23","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1016\/j.jpdc.2011.12.004","volume":"72","author":"X Tang","year":"2012","unstructured":"Tang, X., Li, K., Qiu, M., & Sha, E. H. M. (2012). A hierarchical reliability-driven scheduling algorithm in grid systems. Journal of Parallel and Distributed Computing, 72(4), 525\u2013535.","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"1538_CR24","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, M., Pan, L., Hu, M., Jin, C., & Ren, F. (2018). Two-level attention with two-stage multi-task learning for facial emotion recognition. arXiv:1811.12139.","DOI":"10.1007\/978-3-030-05710-7_19"},{"key":"1538_CR25","doi-asserted-by":"publisher","unstructured":"Wei, X., Zhang, C., Liu, L., Shen, C., & Wu, J. (2018). Coarse-to-fine: A rnn-based hierarchical attention model for vehicle re-identification. In Computer Vision - ACCV 2018 - 14th Asian Conference on Computer Vision, Perth, Australia, December 2-6, 2018, Revised Selected Papers, Part II (pp. 575\u2013591), DOI https:\/\/doi.org\/10.1007\/978-3-030-20890-5_37, (to appear in print).","DOI":"10.1007\/978-3-030-20890-5_37"},{"issue":"1","key":"1538_CR26","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/TAFFC.2015.2512598","volume":"8","author":"R Xia","year":"2017","unstructured":"Xia, R., & Liu, Y. (2017). A multi-task learning framework for emotion recognition using 2d continuous space. IEEE Transactions Affective Computing, 8(1), 3\u201314. https:\/\/doi.org\/10.1109\/TAFFC.2015.2512598.","journal-title":"IEEE Transactions Affective Computing"},{"key":"1538_CR27","doi-asserted-by":"publisher","unstructured":"Xu, J., Xu, R., Lu, Q., & Wang, X. (2012). Coarse-to-fine sentence-level emotion classification based on the intra-sentence features and sentential context. In 21St ACM international Conference on Information and Knowledge Management, CIKM\u201912, Maui, HI, USA, October 29 - November 02, 2012, pp. 2455\u20132458, DOI https:\/\/doi.org\/10.1145\/2396761.2398665, (to appear in print).","DOI":"10.1145\/2396761.2398665"},{"issue":"6","key":"1538_CR28","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2018","unstructured":"Zhang, S., Zhang, S., Huang, T., & Gao, W. (2018). Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Tranactions Multimedia, 20(6), 1576\u20131590. https:\/\/doi.org\/10.1109\/TMM.2017.2766843.","journal-title":"IEEE Tranactions Multimedia"},{"key":"1538_CR29","doi-asserted-by":"publisher","unstructured":"Zhao, H., Xiao, Y., Han, J., & Zhang, Z. (2019). Compact convolutional recurrent neural networks via binarization for speech emotion recognition. In IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2019, Brighton, United Kingdom, May 12-17, 2019 (pp. 6690\u20136694), DOI https:\/\/doi.org\/10.1109\/ICASSP.2019.8683389, (to appear in print).","DOI":"10.1109\/ICASSP.2019.8683389"}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01538-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-020-01538-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01538-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,6,19]],"date-time":"2021-06-19T23:27:21Z","timestamp":1624145241000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-020-01538-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,20]]},"references-count":29,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["1538"],"URL":"https:\/\/doi.org\/10.1007\/s11265-020-01538-x","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"value":"1939-8018","type":"print"},{"value":"1939-8115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,6,20]]},"assertion":[{"value":"19 December 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 March 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 April 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}