{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T22:20:01Z","timestamp":1742941201083,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319943602"},{"type":"electronic","value":"9783319943619"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-94361-9_2","type":"book-chapter","created":{"date-parts":[[2018,6,20]],"date-time":"2018-06-20T05:44:37Z","timestamp":1529473477000},"page":"16-28","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-modal Multi-scale Speech Expression Evaluation in Computer-Assisted Language Learning"],"prefix":"10.1007","author":[{"given":"Jingbei","family":"Li","sequence":"first","affiliation":[]},{"given":"Zhiyong","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Runnan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Mingxing","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Kehua","family":"Lei","sequence":"additional","affiliation":[]},{"given":"Lianhong","family":"Cai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,6,21]]},"reference":[{"key":"2_CR1","unstructured":"Witt, S.M.: Automatic error detection in pronunciation training: where we are and where we need to go. In: International Symposium on Automatic Detection of Errors in Pronunciation Training, Stockholm, Sweden (2012)"},{"issue":"3","key":"2_CR2","first-page":"231","volume":"62","author":"LA McCoy","year":"1996","unstructured":"McCoy, L.A.: The power of your vocal image. J. (Can. Dent. Assoc.) 62(3), 231\u2013234 (1996)","journal-title":"J. (Can. Dent. Assoc.)"},{"key":"2_CR3","volume-title":"Expression in Speech: Analysis and Synthesis","author":"M Tatham","year":"2004","unstructured":"Tatham, M., Morton, K.: Expression in Speech: Analysis and Synthesis. Oxford University Press, New York (2004)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Schmidt, E.M., Kim, Y.E.: Learning emotion-based acoustic features with deep belief networks. In: 2011 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), pp. 65\u201368. IEEE (2011)","DOI":"10.1109\/ASPAA.2011.6082328"},{"key":"2_CR5","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"645","DOI":"10.1007\/978-981-10-3005-5_53","volume-title":"Pattern Recognition","author":"S Zhang","year":"2016","unstructured":"Zhang, S., Zhao, X., Chuang, Y., Guo, W., Chen, Y.: Feature learning via deep belief network for Chinese speech emotion recognition. In: Tan, T., Li, X., Chen, X., Zhou, J., Yang, J., Cheng, H. (eds.) CCPR 2016. CCIS, vol. 663, pp. 645\u2013651. Springer, Singapore (2016). https:\/\/doi.org\/10.1007\/978-981-10-3005-5_53"},{"issue":"8","key":"2_CR6","doi-asserted-by":"publisher","first-page":"2203","DOI":"10.1109\/TMM.2014.2360798","volume":"16","author":"Q Mao","year":"2014","unstructured":"Mao, Q., Dong, M., Huang, Z., Zhan, Y.: Learning salient features for speech emotion recognition using convolutional neural networks. IEEE Trans. Multimedia 16(8), 2203\u20132213 (2014)","journal-title":"IEEE Trans. Multimedia"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Audhkhasi, K., Rosenberg, A., Sethy, A., Ramabhadran, B., Kingsbury, B.: End-to-end ASR-free keyword search from speech. arXiv preprint arXiv:1701.04313 (2017)","DOI":"10.1109\/ICASSP.2017.7953076"},{"key":"2_CR8","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"1999","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM. Neural Comput. 12, 2451\u20132471 (1999)","journal-title":"Neural Comput."},{"issue":"11","key":"2_CR9","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Sig. Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Sig. Process."},{"issue":"5","key":"2_CR10","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"Graves, A., Schmidhuber, J.: Framewise phoneme classification with bidirectional lstm and other neural network architectures. Neural Netw. 18(5), 602\u2013610 (2005)","journal-title":"Neural Netw."},{"issue":"6","key":"2_CR11","doi-asserted-by":"publisher","first-page":"1333","DOI":"10.1109\/72.963769","volume":"12","author":"FA Gers","year":"2001","unstructured":"Gers, F.A., Schmidhuber, E.: LSTM recurrent networks learn simple context-free and context-sensitive languages. IEEE Trans. Neural Netw. 12(6), 1333\u20131340 (2001)","journal-title":"IEEE Trans. Neural Netw."},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Graves, A., Jaitly, N., Mohamed, A.r.: Hybrid speech recognition with deep bidirectional LSTM. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 273\u2013278. IEEE (2013)","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Ma, Y., Li, X., Xu, M., Jia, J., Cai, L.: Multi-scale context based attention for dynamic music emotion prediction. In: Proceedings of the 2017 ACM on Multimedia Conference, pp. 1443\u20131450. ACM (2017)","DOI":"10.1145\/3123266.3123408"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Huang, C.W., Narayanan, S.S.: Attention assisted discovery of sub-utterance structure in speech emotion recognition. In: INTERSPEECH, pp. 1387\u20131391 (2016)","DOI":"10.21437\/Interspeech.2016-448"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Hori, C., Hori, T., Lee, T.Y., Zhang, Z., Harsham, B., Hershey, J.R., Marks, T.K., Sumi, K.: Attention-based multimodal fusion for video description. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 4203\u20134212. IEEE (2017)","DOI":"10.1109\/ICCV.2017.450"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Tian, L., Moore, J.D., Lai, C.: Recognizing emotions in dialogues with acoustic and lexical features. In: 2015 International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 737\u2013742. IEEE (2015)","DOI":"10.1109\/ACII.2015.7344651"},{"issue":"11","key":"2_CR17","doi-asserted-by":"publisher","first-page":"1936","DOI":"10.1109\/TMM.2015.2477058","volume":"17","author":"L Zhao","year":"2015","unstructured":"Zhao, L., Hu, Q., Wang, W.: Heterogeneous feature selection with multi-modal deep neural networks and sparse group lasso. IEEE Trans. Multimedia 17(11), 1936\u20131948 (2015)","journal-title":"IEEE Trans. Multimedia"},{"issue":"3","key":"2_CR18","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1080\/10570318409374163","volume":"48","author":"William G. Christ","year":"1984","unstructured":"Christ, W.G., Biggers, T.: An exploratory investigation into the relationship between television program preference and emotion-eliciting qualities\u2014a new theoretical perspective. Western J. Commun. (Includes Commun. Rep.) 48(3), 293\u2013307 (1984)","journal-title":"Western Journal of Speech Communication"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Huang, J.T., Li, J., Gong, Y.: An analysis of convolutional neural networks for speech recognition. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4989\u20134993. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178920"},{"issue":"5786","key":"2_CR20","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Salakhutdinov, R.R.: Reducing the dimensionality of data with neural networks. Science 313(5786), 504\u2013507 (2006)","journal-title":"Science"},{"key":"2_CR21","unstructured":"Shortell, T.: An Introduction to Data Analysis & Presentation. World Wide Web (2001). http:\/\/academic.brooklyn.cuny.edu\/soc\/courses\/712\/chap18.html"},{"key":"2_CR22","doi-asserted-by":"publisher","first-page":"292","DOI":"10.3389\/fpsyg.2013.00292","volume":"4","author":"F Weninger","year":"2013","unstructured":"Weninger, F., Eyben, F., Schuller, B.W., Mortillaro, M., Scherer, K.R.: On the acoustics of emotion in audio: what speech, music, and sound have in common. Front. Psychol. 4, 292 (2013)","journal-title":"Front. Psychol."},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Eyben, F., Weninger, F., Gross, F., Schuller, B.: Recent developments in opensmile, the Munich open-source multimedia feature extractor. In: Proceedings of the 21st ACM International Conference on Multimedia, pp. 835\u2013838. ACM (2013)","DOI":"10.1145\/2502081.2502224"},{"key":"2_CR24","unstructured":"Chollet, F.: Keras (2015). http:\/\/keras.io . Accessed 2017"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Bergstra, J., Breuleux, O., Bastien, F., Lamblin, P., Pascanu, R., Desjardins, G., Turian, J., Warde-Farley, D., Bengio, Y.: Theano: a CPU and GPU math compiler in python. In: Proceedings of 9th Python in Science Conference, pp. 1\u20137 (2010)","DOI":"10.25080\/Majora-92bf1922-003"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence and Mobile Services \u2013 AIMS 2018"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-94361-9_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,4]],"date-time":"2020-11-04T00:17:22Z","timestamp":1604449042000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-94361-9_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319943602","9783319943619"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-94361-9_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}