{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T02:06:23Z","timestamp":1774922783224,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":36,"publisher":"Springer Singapore","isbn-type":[{"value":"9789811025846","type":"print"},{"value":"9789811025853","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,12,25]],"date-time":"2016-12-25T00:00:00Z","timestamp":1482624000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-981-10-2585-3_15","type":"book-chapter","created":{"date-parts":[[2016,12,24]],"date-time":"2016-12-24T22:34:25Z","timestamp":1482618865000},"page":"195-203","source":"Crossref","is-referenced-by-count":17,"title":["Fisher Kernels on Phase-Based Features for Speech Emotion Recognition"],"prefix":"10.1007","author":[{"given":"Jun","family":"Deng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinzhou","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zixing","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sascha","family":"Fr\u00fchholz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Didier","family":"Grandjean","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bj\u00f6rn","family":"Schuller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,12,25]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Andre, E., Rehm, M., Minker, W., B\u00fchler, D.: Endowing spoken language dialogue systems with emotional intelligence. In: Affective Dialogue Systems, pp. 178\u2013187. Springer (2004)","DOI":"10.1007\/978-3-540-24842-2_17"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Acosta, J.C.: Using emotion to gain rapport in a spoken dialog system. In: Proceedings of NAACL HLT, pp. 49\u201354. Boulder, CO (2009)","DOI":"10.3115\/1620932.1620941"},{"issue":"1","key":"15_CR3","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/s10772-010-9068-y","volume":"13","author":"J Pittermann","year":"2010","unstructured":"Pittermann, J., Pittermann, A., Minker, W.: Emotion recognition and adaptation in spoken dialogue systems. Int. J. Speech Technol. 13(1), 49\u201360 (2010)","journal-title":"Int. J. Speech Technol."},{"key":"15_CR4","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1186\/1687-6180-2011-6","volume":"2011","author":"Z Callejas","year":"2011","unstructured":"Callejas, Z., Griol, D., L\u00f3pez-C\u00f3zar, R.: Predicting user mental states in spoken dialogue systems. EURASIP J. Adv. Sign. Process. 2011, 6 (2011)","journal-title":"EURASIP J. Adv. Sign. Process."},{"issue":"1","key":"15_CR5","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1109\/T-AFFC.2011.27","volume":"3","author":"A Vinciarelli","year":"2012","unstructured":"Vinciarelli, A., Pantic, M., Heylen, D., Pelachaud, C., Poggi, I., D\u2019Errico, F., Schr\u00f6der, M.: Bridging the gap between social animal and unsocial machine: a survey of social signal processing. IEEE Trans. Affect. Comput. 3(1), 69\u201387 (2012)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"3","key":"15_CR6","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1109\/T-AFFC.2013.15","volume":"4","author":"D Benyon","year":"2013","unstructured":"Benyon, D., Gamback, B., Hansen, P., Mival, O., Webb, N.: How was your day? Evaluating a conversational companion. IEEE Trans. Affect. Comput. 4(3), 299\u2013311 (2013)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Dumouchel, P., Dehak, N., Attabi, Y., Dehak, R., Boufaden, N.: Cepstral and long-term features for emotion recognition. In: Proceedings of INTERSPEECH, pp. 344\u2013347. Brighton, UK (2009)","DOI":"10.21437\/Interspeech.2009-111"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Schuller, B.: Intelligent Audio Analysis. Signals and Communication Technology, Springer (2013), 350 p","DOI":"10.1007\/978-3-642-36806-6"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Anagnostopoulos, C.N., Iliou, T., Giannoukos, I.: Features and classifiers for emotion recognition from speech: a survey from 2000 to 2011. Artif. Intell. Rev. pp. 1\u201323 (2012)","DOI":"10.1007\/s10462-012-9368-5"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Attabi, Y., Alam, M.J., Dumouchel, P., Kenny, P., O\u2019Shaughnessy, D.: Multiple windowed spectral features for emotion recognition. In: Proceedings of ICASSP, pp. 7527\u20137531. IEEE, Vancouver, BC (2013)","DOI":"10.1109\/ICASSP.2013.6639126"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Mowlaee, P., Saeidi, R., Stylanou, Y.: INTERSPEECH 2014 special session: phase importance in speech processing applications. In: Proceedings of INTERSPEECH. Singapore (2014), 5 p","DOI":"10.21437\/Interspeech.2014-385"},{"issue":"4","key":"15_CR12","doi-asserted-by":"crossref","first-page":"832","DOI":"10.1109\/TASSP.1985.1164651","volume":"33","author":"B Yegnanarayana","year":"1985","unstructured":"Yegnanarayana, B., Sreekanth, J., Rangarajan, A.: Waveform estimation using group delay processing. IEEE Trans. Acoust. Speech Sign. Process. 33(4), 832\u2013836 (1985)","journal-title":"IEEE Trans. Acoust. Speech Sign. Process."},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Murthy, H., Gadde, V., et\u00a0al.: The modified group delay function and its application to phoneme recognition. In: Proceedings of ICASSP, vol.\u00a01, pp. I\u201368. Hong Kong, China (2003)","DOI":"10.1109\/ICASSP.2003.1198718"},{"issue":"1","key":"15_CR14","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1109\/TASL.2006.876858","volume":"15","author":"R Hegde","year":"2007","unstructured":"Hegde, R., Murthy, H., Gadde, V.: Significance of the modified group delay feature in speech recognition. IEEE Trans. Audio Speech Lang. Process. 15(1), 190\u2013202 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Mowlaee, P., Saiedi, R., Martin, R.: Phase estimation for signal reconstruction in single-channel speech separation. In: Proceedings of ICSLP, pp. 1\u20134. Hong Kong, China (2012)","DOI":"10.21437\/Interspeech.2012-436"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Hern\u00e1ez, I., Saratxaga, I., Sanchez, J., Navas, E., Luengo, I.: Use of the harmonic phase in speaker recognition. In: Proceedings of INTERSPEECH, pp. 2757\u20132760. Florence, Italy (2011)","DOI":"10.21437\/Interspeech.2011-156"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Tahon, M., Degottex, G., Devillers, L.: Usual voice quality features and glottal features for emotional valence detection. In: Proceedings of ICSP, pp. 693\u2013696. Beijing, China (2012)","DOI":"10.21437\/SpeechProsody.2012-173"},{"key":"15_CR18","unstructured":"Jaakkola, T., Haussler, D.: Exploiting generative models in discriminative classifiers. In: Proceedings of NIPS, pp. 487\u2013493. Denver, CO (1999)"},{"issue":"7","key":"15_CR19","doi-asserted-by":"crossref","first-page":"2026","DOI":"10.1109\/TASL.2011.2109379","volume":"19","author":"I McCowan","year":"2011","unstructured":"McCowan, I., Dean, D., McLaren, M., Vogt, R., Sridharan, S.: The delta-phase spectrum with application to voice activity detection and speaker recognition. IEEE Trans. Audio Speech Lang. Process. 19(7), 2026\u20132038 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"15_CR20","unstructured":"Diment, A., Rajan, P., Heittola, T., Virtanen, T.: Modified group delay feature for musical instrument recognition. In: Proceedings of CMMR, pp. 431\u2013438. Marseille, France (2013)"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Wu, Z., Siong, C.E., Li, H.: Detecting converted speech and natural speech for anti-spoofing attack in speaker recognition. In: Proceedings of INTERSPEECH. Portland, OR (2012), 4 p","DOI":"10.21437\/Interspeech.2012-465"},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Xiao, X., Tian, X., Du, S., Xu, H., Chng, E.S., Li, H.: Spoofing speech detection using high dimensional magnitude and phase features: The NTU approach for ASVspoof 2015 challenge. In: Proceedings of INTERSPEECH, pp. 2052\u20132056. Dresden, Germany (2015)","DOI":"10.21437\/Interspeech.2015-465"},{"key":"15_CR23","unstructured":"Zhu, D., Paliwal, K.K.: Product of power spectrum and group delay function for speech recognition. In: Proceedings of ICASSP, pp. 125\u2013128. Montreal, Canada (2004)"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Moreno, P.J., Rifkin, R.: Using the Fisher kernel method for web audio classification. In: Proceedings of ICASSP, pp. 2417\u20132420. Istanbul, Turkey (2000)","DOI":"10.1109\/ICASSP.2000.859329"},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Fine, S., Navr\u00e1til, J., Gopinath, R.A.: A hybrid GMM\/SVM approach to speaker identification. In: Proceedings of ICASSP, pp. 417\u2013420. Utah, USA (2001)","DOI":"10.1109\/ICASSP.2001.940856"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Perronnin, F., Dance, C.: Fisher kernels on visual vocabularies for image categorization. In: Proceedings of CVPR, pp. 1\u20138. Minneapolis, MN (2007)","DOI":"10.1109\/CVPR.2007.383266"},{"key":"15_CR27","doi-asserted-by":"crossref","unstructured":"Perronnin, F., S\u00e1nchez, J., Mensink, T.: Improving the Fisher kernel for large-scale image classification. In: Proceedings of ECCV, pp. 143\u2013156. Crete, Greece (2010)","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., Vinciarelli, A., Scherer, K., Ringeval, F., Chetouani, M., Weninger, F., Eyben, F., Marchi, E., Mortillaro, M., Salamin, H., Polychroniou, A., Valente, F., Kim, S.: The INTERSPEECH 2013 computational paralinguistics challenge: social signals, conflict, emotion, autism. In: Proceedings of INTERSPEECH, pp. 148\u2013152. Lyon, France (2013)","DOI":"10.21437\/Interspeech.2013-56"},{"key":"15_CR29","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan, R.E., Chang, K.W., Hsieh, C.J., Wang, X.R., Lin, C.J.: LIBLINEAR: a library for large linear classification. J. Mach. Learn. Res. 9, 1871\u20131874 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Eyben, F., W\u00f6llmer, M., Schuller, B.: openSMILE\u2014the Munich versatile and fast open-source audio feature extractor. In: Proceedings of MM, pp. 1459\u20131462. Florence, Italy (2010)","DOI":"10.1145\/1873951.1874246"},{"key":"15_CR31","doi-asserted-by":"crossref","unstructured":"Eyben, F., Weninger, F., Gro\u00df, F., Schuller, B.: Recent developments in openSMILE, the Munich open-source multimedia feature extractor. In: Proceedings of MM, pp. 835\u2013838. Barcelona, Spain (2013)","DOI":"10.1145\/2502081.2502224"},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A.: The INTERSPEECH 2009 emotion challenge. In: Proceedings of INTERSPEECH, pp. 312\u2013315. Brighton, UK (2009)","DOI":"10.21437\/Interspeech.2009-103"},{"key":"15_CR33","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., Burkhardt, F., Devillers, L., M\u00fcller, C., Narayanan, S.: The INTERSPEECH 2010 paralinguistic challenge. In: Proceedings of INTERSPEECH, pp. 2794\u20132797. Makuhari, Japan (2010)","DOI":"10.21437\/Interspeech.2010-739"},{"key":"15_CR34","doi-asserted-by":"crossref","unstructured":"Schuller, B., Batliner, A., Steidl, S., Schiel, F., Krajewski, J.: The INTERSPEECH 2011 speaker state challenge. In: Proceedings of INTERSPEECH, pp. 3201\u20133204. Florence, Italy (2011)","DOI":"10.21437\/Interspeech.2011-801"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., N\u00f6th, E., Vinciarelli, A., Burkhardt, F., van Son, R., Weninger, F., Eyben, F., Bocklet, T., Mohammadi, G., Weiss, B.: The INTERSPEECH 2012 speaker trait challenge. In: Proceedings of INTERSPEECH, Portland, OR (2012)","DOI":"10.21437\/Interspeech.2012-86"},{"key":"15_CR36","doi-asserted-by":"crossref","unstructured":"Eyben, F., Scherer, K., Schuller, B., Sundberg, J., Andr\u00e9, E., Busso, C., Devillers, L., Epps, J., Laukka, P., Narayanan, S., Truong, K.: The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Trans. Affect. Comput. 7(2), 190\u2013202 (2016)","DOI":"10.1109\/TAFFC.2015.2457417"}],"container-title":["Lecture Notes in Electrical Engineering","Dialogues with Social Robots"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-2585-3_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,21]],"date-time":"2024-06-21T12:02:12Z","timestamp":1718971332000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-2585-3_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12,25]]},"ISBN":["9789811025846","9789811025853"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-2585-3_15","relation":{},"ISSN":["1876-1100","1876-1119"],"issn-type":[{"value":"1876-1100","type":"print"},{"value":"1876-1119","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,12,25]]}}}