{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T04:12:37Z","timestamp":1750824757069,"version":"3.41.0"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319664286"},{"type":"electronic","value":"9783319664293"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-66429-3_63","type":"book-chapter","created":{"date-parts":[[2017,8,12]],"date-time":"2017-08-12T02:02:55Z","timestamp":1502503375000},"page":"632-641","source":"Crossref","is-referenced-by-count":2,"title":["Recognizing Emotionally Coloured Dialogue Speech Using Speaker-Adapted DNN-CNN Bottleneck Features"],"prefix":"10.1007","author":[{"given":"Kohei","family":"Mukaihara","sequence":"first","affiliation":[]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,8,13]]},"reference":[{"issue":"6","key":"63_CR1","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1250\/ast.33.359","volume":"33","author":"Y Arimoto","year":"2012","unstructured":"Arimoto, Y., Kawatsu, H., Ohno, S., Iida, H.: Naturalistic emotional speech collection paradigm with online game and its psychological and acoustical assessment. Acoust. Sci. Technol. 33(6), 359\u2013369 (2012)","journal-title":"Acoust. Sci. Technol."},{"issue":"4","key":"63_CR2","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1016\/j.neunet.2005.03.008","volume":"18","author":"T Athanaselis","year":"2005","unstructured":"Athanaselis, T., Bakamidis, S., Dologlou, I., Cowie, R., Douglas-Cowie, E., Cox, C.: ASR for emotional speech: clarifying the issues and enhancing performance. Neural Netw. 18(4), 437\u2013444 (2005)","journal-title":"Neural Netw."},{"key":"63_CR3","unstructured":"Athanaselis, T., Bakamidis, S., Dologlou, I.: Recognizing verbal content of emotionally coloured speech. In: Proceedings of EUSIPCO, Florence, Italy (2006)"},{"issue":"2","key":"63_CR4","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"M Gales","year":"1998","unstructured":"Gales, M.: Maximum likelihood linear transformations for HMM-based speech recognition. Comput. Speech Lang. 12(2), 75\u201398 (1998)","journal-title":"Comput. Speech Lang."},{"issue":"3","key":"63_CR5","doi-asserted-by":"crossref","first-page":"272","DOI":"10.1109\/89.759034","volume":"7","author":"M Gales","year":"1999","unstructured":"Gales, M.: Semi-tied covariance matrices for hidden Markov models. IEEE Trans. Speech Audio Process. 7(3), 272\u2013281 (1999)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"63_CR6","doi-asserted-by":"crossref","unstructured":"Gopinath, R.: Maximum likelihood modeling with Gaussian distributions for classification. In: Proceedings of ICASSP, pp. 661\u2013664 (1998)","DOI":"10.1109\/ICASSP.1998.675351"},{"key":"63_CR7","unstructured":"Maekawa, K., Koiso, H., Furui, S., Isahara, H.: Spontaneous speech corpus of Japanese. In: Proceedings of LREC, Athens, Greece, pp. 947\u2013952 (2000)"},{"issue":"1","key":"63_CR8","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1109\/T-AFFC.2011.20","volume":"3","author":"G McKeown","year":"2012","unstructured":"McKeown, G., Valstar, M., Cowie, R., Pantic, M., Schroder, M.: The SEMAINE database: annotated multimodal records of emotionally coloured conversations between a person and a limited agent. IEEE Trans. Affect. Comput. 3(1), 5\u201317 (2012)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"63_CR9","unstructured":"Miao, Y.: Kaldi+PDNN: building DNN-based ASR systems with Kaldi and PDNN. arXiv:1401.6984 (2014)"},{"issue":"2","key":"63_CR10","doi-asserted-by":"crossref","first-page":"1097","DOI":"10.1121\/1.405558","volume":"93","author":"I Murray","year":"1993","unstructured":"Murray, I., Arnott, L.: Toward the simulation of emotion in synthetic speech: a review of the listerature on human vocal emotion. J. Acoust. Soc. Am. 93(2), 1097\u20131108 (1993)","journal-title":"J. Acoust. Soc. Am."},{"key":"63_CR11","doi-asserted-by":"crossref","unstructured":"Paul, D., Baker, J.: The design for the Wall Street Journal-based CSR corpus. In: Proceedings of DARPA Speech and Language Workshop, San Mateo, USA (1992)","DOI":"10.3115\/1075527.1075614"},{"key":"63_CR12","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/1140.001.0001","volume-title":"Affective Computing","author":"R Picard","year":"1997","unstructured":"Picard, R.: Affective Computing. MIT Press, Cambridge (1997)"},{"key":"63_CR13","doi-asserted-by":"crossref","unstructured":"Plutchik, R.: A general psychoevolutionary theory of emotion. In: Theories of emotion. Academic Press (1980)","DOI":"10.1016\/B978-0-12-558701-3.50007-7"},{"key":"63_CR14","unstructured":"Polzin, S., Waibel, A.: Pronunciation variations in emotional speech. In: Proceedings of ESCA, pp. 103\u2013108 (1998)"},{"key":"63_CR15","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Moticek, P., Qian, Y., Schwarz, P., Silovsky, J., Stemmer, G., Vesely, K.: The Kaldi speech recognition toolkit. In: Proceedings of ASRU, Hawaii, USA (2011)"},{"key":"63_CR16","doi-asserted-by":"crossref","unstructured":"Schuller, B., Stadermann, J., Rigoll, G.: Affect-robust speech recognition by dynamic emotional adaptation. In: Proceedings of Speech Prosody (2006)","DOI":"10.21437\/SpeechProsody.2006-221"},{"key":"63_CR17","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A.: The INTERSPEECH 2009 emotion challenge. In: Proceedings of INTERSPEECH, Brighton, United Kingdom, pp. 312\u2013315 (2009)","DOI":"10.21437\/Interspeech.2009-103"},{"key":"63_CR18","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Burkhardt, F., Devillers, L., Muller, C., Narayanan, S.: The INTERSPEECH 2010 paralinguistic challenge. In: Proceedings of INTERSPEECH, Makuhari, Japan, pp. 2794\u20132797 (2010)","DOI":"10.21437\/Interspeech.2010-739"},{"key":"63_CR19","doi-asserted-by":"crossref","unstructured":"Schuller, B., Valstar, M., Eyben, F., McKeown, G., Cowie, R., Pantic, M.: AVEC 2011 - the first international audio\/visual emotion challenge. In: Proceedings of International Conference on Affective Computing and Intelligent Interaction (ACII), Memphis, Tennessee, pp. 415\u2013424 (2011)","DOI":"10.1007\/978-3-642-24571-8_53"},{"key":"63_CR20","doi-asserted-by":"crossref","unstructured":"Stolcke, A.: SRILM - an extensible language modeling toolkit. In: Proceedings of of ICSLP, Denver, USA, pp. 901\u2013904 (2002)","DOI":"10.21437\/ICSLP.2002-303"},{"key":"63_CR21","first-page":"3371","volume":"37","author":"P Vincent","year":"2010","unstructured":"Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y., Manzagol, P.A.: Stacked denoising autoencoders: learning useful representations in a deep network with a local denoising criterion. J. Mach. Learn. Res. 37, 3371\u20133408 (2010)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"63_CR22","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1109\/29.21701","volume":"37","author":"A Waibel","year":"1989","unstructured":"Waibel, A., Hanazawa, T., Hinton, G., Shikano, K., Lang, K.J.: Phoneme recognition using time-delay neural networks. IEEE Trans. Acoust. Speech Signal Process. 37(3), 328\u2013339 (1989)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"63_CR23","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1121\/1.1913238","volume":"52","author":"C Williams","year":"1972","unstructured":"Williams, C., Stevens, K.: Emotion and speech: some acoustical correlates. J. Acoust. Soc. Amer 52, 1238\u20131250 (1972)","journal-title":"J. Acoust. Soc. Amer"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-66429-3_63","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T22:23:47Z","timestamp":1750803827000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-66429-3_63"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319664286","9783319664293"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-66429-3_63","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}