{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:58:23Z","timestamp":1740149903469,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,1,11]],"date-time":"2017-01-11T00:00:00Z","timestamp":1484092800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2017,1,11]],"date-time":"2017-01-11T00:00:00Z","timestamp":1484092800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["263989"],"award-info":[{"award-number":["263989"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Multimodal User Interfaces"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s12193-017-0238-y","type":"journal-article","created":{"date-parts":[[2017,1,11]],"date-time":"2017-01-11T11:53:34Z","timestamp":1484135614000},"page":"185-196","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Cluster-based approach to discriminate the user\u2019s state whether a user is embarrassed or thinking to an answer to a prompt"],"prefix":"10.1007","volume":"11","author":[{"given":"Yuya","family":"Chiba","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Takashi","family":"Nose","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Akinori","family":"Ito","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,1,11]]},"reference":[{"key":"238_CR1","doi-asserted-by":"crossref","unstructured":"Adelhardt J, Shi R, Frank C, Zei\u00dfler V, Batliner A, N\u00f6th E, Niemann H (2003) Multimodal user state recognition in a modern dialogue system. In: Proceedings of the 26th german conference on artificial intelligence, pp 591\u2013605","DOI":"10.1007\/978-3-540-39451-8_43"},{"key":"238_CR2","unstructured":"Arthur D, Vassilvitskii S (2007) k-means++: the advantages of careful seeding. In: Proceedings of the 18th annual ACM-SIAM symposium on discrete algorithms, pp 1027\u20131035"},{"issue":"3","key":"238_CR3","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1006\/jmla.1995.1017","volume":"34","author":"SE Brennan","year":"1995","unstructured":"Brennan SE, Williams M (1995) The feeling of another\u2019s knowing: prosody and filled pauses as cues to listeners about the metacognitive states of speakers. J Mem Lang 34(3):383\u2013398","journal-title":"J Mem Lang"},{"key":"238_CR4","first-page":"1","volume":"6","author":"Z Callejas","year":"2011","unstructured":"Callejas Z, Griol D, L\u00f3pez-C\u00f3zar R (2011) Predicting user mental states in spoken dialogue systems. EURASIP J Adv Signal Process 6:1\u201321","journal-title":"EURASIP J Adv Signal Process"},{"key":"238_CR5","doi-asserted-by":"publisher","DOI":"10.1155\/2012\/865362","author":"Y Chiba","year":"2012","unstructured":"Chiba Y, Ito A (2012) Estimating a user\u2019s internal state before the first input utterance. Adv Hum Comput Inter. doi: 10.1155\/2012\/865362","journal-title":"Adv Hum Comput Inter"},{"key":"238_CR6","unstructured":"Chiba Y, Ito M, Ito A (2012) Effect of linguistic contents on human estimation of internal state of dialog system users. In: Proceedings of the interdisciplinary workshop on feedback behaviors in dialog, pp 11\u201314"},{"key":"238_CR7","doi-asserted-by":"crossref","unstructured":"Chiba Y, Ito M, Ito A (2013) Estimation of user\u2019s state during a dialog turn with sequential multi-modal features. In: HCI international 2013-posters\u2019 extended abstracts, pp 572\u2013576","DOI":"10.1007\/978-3-642-39476-8_115"},{"key":"238_CR8","unstructured":"Chiba Y, Ito M, Ito A (2014a) Modeling user\u2019s state during dialog turn using HMM for multi-modal spoken dialog system. In: Proceedigs of the 7th international conference on advances in computer\u2013human interactions, pp 343\u2013346"},{"key":"238_CR9","doi-asserted-by":"crossref","unstructured":"Chiba Y, Nose T, Ito A, Ito M (2014b) User modeling by using bag-of-behaviors for building a dialog system sensitive to the interlocutor\u2019s internal state. In: Proceedings of the 15th annual meeting of the special interest group on discourse and dialogue, pp 74\u201378","DOI":"10.3115\/v1\/W14-4310"},{"key":"238_CR10","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1016\/j.brainres.2008.04.023","volume":"1242","author":"O Collignon","year":"2008","unstructured":"Collignon O, Girard S, Gosselin F, Roy S, Saint-Amour D, Lassonde M, Lepore F (2008) Audio-visual integration of emotion expression. Brain Res 1242:126\u2013135","journal-title":"Brain Res"},{"key":"238_CR11","unstructured":"Csurka G, Dance C, Fan L, Willamowski J, Bray C (2004) Visual categorization with bags of keypoints. In: Proceedings of the workshop on statistical learning in computer vision, pp 1\u201322"},{"issue":"5","key":"238_CR12","doi-asserted-by":"publisher","first-page":"514","DOI":"10.1016\/j.jbi.2006.01.001","volume":"39","author":"F de Rosis","year":"2006","unstructured":"de Rosis F, Novielli N, Carofiglio V, Cavalluzzi A, de Carolis B (2006) User modeling and adaptation in health promotion dialogs with an animated character. J Biomed Inform 39(5):514\u2013531","journal-title":"J Biomed Inform"},{"key":"238_CR13","doi-asserted-by":"crossref","unstructured":"Ding C, He X (2004) K-means clustering via principal component analysis. In: Proceediings of the 21st international conference on machine learning, pp 225\u2013232","DOI":"10.1145\/1015330.1015408"},{"issue":"9\u201310","key":"238_CR14","doi-asserted-by":"publisher","first-page":"1115","DOI":"10.1016\/j.specom.2011.02.006","volume":"53","author":"K Forbes-Riley","year":"2011","unstructured":"Forbes-Riley K, Litman D (2011a) Benefits and challenges of real-time uncertainty detection and adaptation in a spoken dialogue computer tutor. Speech Commun 53(9\u201310):1115\u20131136","journal-title":"Speech Commun"},{"issue":"1","key":"238_CR15","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.csl.2009.12.002","volume":"25","author":"K Forbes-Riley","year":"2011","unstructured":"Forbes-Riley K, Litman D (2011b) Designing and evaluating a wizarded uncertainty-adaptive spoken dialogue tutoring system. Comput Speech Lang 25(1):105\u2013126","journal-title":"Comput Speech Lang"},{"issue":"4","key":"238_CR16","doi-asserted-by":"publisher","first-page":"749","DOI":"10.1007\/s10489-013-0503-z","volume":"40","author":"D Griol","year":"2014","unstructured":"Griol D, Molina JM, Callejas Z (2014) Modeling the user state for context-aware spoken interaction in ambient assisted living. Appl Intell 40(4):749\u2013771","journal-title":"Appl Intell"},{"key":"238_CR17","doi-asserted-by":"crossref","unstructured":"Hudson S, Fogarty J, Atkeson C, Avrahami D, Forlizzi J, Kiesler S, Lee J, Yang J (2003) Predicting human interruptibility with sensors: a Wizard of Oz feasibility study. In: Proceedings of the SIGCHI conference on human factors in computing systems, pp 257\u2013264","DOI":"10.1145\/642611.642657"},{"key":"238_CR18","doi-asserted-by":"crossref","unstructured":"Jiang YG, Ngo CW, Yang J (2007) Towards optimal bag-of-features for object categorization and semantic video retrieval. In: Proceedings of the 6th ACM international conference on image and video retrieval, pp 494\u2013501","DOI":"10.1145\/1282280.1282352"},{"key":"238_CR19","doi-asserted-by":"crossref","unstructured":"Jokinen K, Kanto K (2004) User expertise modelling and adaptivity in a speech-based e-mail system. In: Proceedings of the 42nd annual meeting on association for computational linguistics, pp 88\u201395","DOI":"10.3115\/1218955.1218967"},{"key":"238_CR20","doi-asserted-by":"crossref","unstructured":"Kobayashi A, Kayama K, Mizukami E, Misu T, Kashioka H, Kawai H, Nakamura S (2010) Evaluation of facial direction estimation from cameras for multi-modal spoken dialog system. In: Proceedings of the international workshop on spoken dialogue systems technology, pp 73\u201384","DOI":"10.1007\/978-3-642-16202-2_7"},{"key":"238_CR21","doi-asserted-by":"crossref","unstructured":"Koda T, Maes P (1996) Agents with faces: the effect of personification. In: Proceedings of the IEEE international workshop on robot and human communication, pp 189\u2013194","DOI":"10.1109\/ROMAN.1996.568812"},{"issue":"1","key":"238_CR22","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TMM.2011.2171334","volume":"14","author":"JC Lin","year":"2012","unstructured":"Lin JC, Wu CH, Wei WL (2012) Error weighted semi-coupled hidden Markov model for audio-visual emotion recognition. IEEE Trans Multimed 14(1):142\u2013156","journal-title":"IEEE Trans Multimed"},{"issue":"2","key":"238_CR23","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1109\/T-AFFC.2011.40","volume":"3","author":"A Metallinou","year":"2012","unstructured":"Metallinou A, Wollmer M, Katsamanis A, Eyben F, Schuller B, Narayanan S (2012) Context-sensitive learning for enhanced audiovisual emotion classification. IEEE Trans Affect Comput 3(2):184\u2013198","journal-title":"IEEE Trans Affect Comput"},{"key":"238_CR24","doi-asserted-by":"crossref","unstructured":"Michalowski MP, Sabanovic S, Simmons R (2006) A spatial model of engagement for a social robot. In: Proceedings of the 9th IEEE international workshop on advanced motion control, pp 762\u2013767","DOI":"10.1109\/AMC.2006.1631755"},{"key":"238_CR25","doi-asserted-by":"crossref","unstructured":"Natarajan P, Wu S, Vitaladevuni S, Zhuang X, Tsakalidis S, Park U, Prasad R, Natarajan P (2012) Multimodal feature fusion for robust event detection in web videos. In: Proceedings of computer vision and pattern recognition, pp 1298\u20131305","DOI":"10.1109\/CVPR.2012.6247814"},{"key":"238_CR26","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/89.221363","volume":"1","author":"KK Paliwal","year":"1993","unstructured":"Paliwal KK, Atal BS (1993) Efficient vector quantization of LPC parameters at 24 bits\/frame. IEEE Trans Speech Audio Process 1:3\u201314","journal-title":"IEEE Trans Speech Audio Process"},{"issue":"3\u20134","key":"238_CR27","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1016\/j.specom.2003.10.003","volume":"42","author":"AN Pargellis","year":"2004","unstructured":"Pargellis AN, Kuo HKJ, Lee CH (2004) An automatic dialogue generation platform for personalized dialogue applications. Speech Commun 42(3\u20134):329\u2013351","journal-title":"Speech Commun"},{"issue":"2","key":"238_CR28","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1007\/s11031-011-9206-0","volume":"35","author":"S Paulmann","year":"2011","unstructured":"Paulmann S, Pell MD (2011) Is there an advantage for recognizing multi-modal emotional stimuli? Motiv Emot 35(2):192\u2013201","journal-title":"Motiv Emot"},{"issue":"2","key":"238_CR29","first-page":"171","volume":"16","author":"H Pon-Barry","year":"2006","unstructured":"Pon-Barry H, Schultz K, Bratt EO, Clark B, Peters S (2006) Responding to student uncertainty in spoken tutorial dialogue systems. Int J Artif Intell Educ 16(2):171\u2013194","journal-title":"Int J Artif Intell Educ"},{"issue":"2","key":"238_CR30","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1007\/s11263-010-0380-4","volume":"91","author":"JM Saragih","year":"2011","unstructured":"Saragih JM, Lucey S, Cohn JF (2011) Deformable model fitting by regularized landmark mean-shift. Int J Comput Vis 91(2):200\u2013215","journal-title":"Int J Comput Vis"},{"key":"238_CR31","unstructured":"Satake S, Kanda T, Glas DF, Imai M, Ishiguro H, Hagita N (2009) How to approach humans? Strategies for social robots to initiate interaction. In: Proceedings of the 4th ACM\/IEEE international conference on human\u2013robot interaction, pp 109\u2013116"},{"key":"238_CR32","first-page":"1531","volume":"7","author":"S Sonnenburg","year":"2006","unstructured":"Sonnenburg S, R\u00e4tsch G, Sch\u00e4fer C, Sch\u00f6lkopf B (2006) Large scale multiple kernel learning. J Mach Learn Res 7:1531\u20131565","journal-title":"J Mach Learn Res"},{"issue":"1","key":"238_CR33","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/j.jml.2005.02.003","volume":"53","author":"M Swerts","year":"2005","unstructured":"Swerts M, Krahmer E (2005) Audiovisual prosody and feeling of knowing. J Mem Lang 53(1):81\u201394","journal-title":"J Mem Lang"},{"key":"238_CR34","doi-asserted-by":"crossref","unstructured":"Walker JH, Sproull L, Subramani R (1994) Using a human face in an interface. In: Proceedings of the SIGCHI conference on human factors in computing systems, pp 85\u201391","DOI":"10.1145\/191666.191708"},{"issue":"3","key":"238_CR35","doi-asserted-by":"publisher","first-page":"597","DOI":"10.1109\/TMM.2012.2189550","volume":"14","author":"Y Wang","year":"2012","unstructured":"Wang Y, Guan L, Venetsanopoulos AN (2012) Kernel cross-modal factor analysis for information fusion with application to bimodal emotion recognition. IEEE Trans Multimed 14(3):597\u2013607","journal-title":"IEEE Trans Multimed"},{"issue":"2","key":"238_CR36","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/j.imavis.2012.03.001","volume":"31","author":"M W\u00f6llmer","year":"2013","unstructured":"W\u00f6llmer M, Kaiser M, Eyben F, Schuller B, Rigoll G (2013) LSTM-modeling of continuous emotions in an audiovisual affect recognition framework. Image Vis Comput 31(2):153\u2013163","journal-title":"Image Vis Comput"},{"issue":"1","key":"238_CR37","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1109\/TPAMI.2008.52","volume":"31","author":"Z Zeng","year":"2009","unstructured":"Zeng Z, Pantic M, Roisman GI, Huang TS (2009) A survey of affect recognition methods: Audio, visual, and spontaneous expressions. IEEE Trans Pattern Anal Mach Intell 31(1):39\u201358","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["Journal on Multimodal User Interfaces"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12193-017-0238-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12193-017-0238-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12193-017-0238-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:57:46Z","timestamp":1692640666000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12193-017-0238-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,1,11]]},"references-count":37,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["238"],"URL":"https:\/\/doi.org\/10.1007\/s12193-017-0238-y","relation":{},"ISSN":["1783-7677","1783-8738"],"issn-type":[{"type":"print","value":"1783-7677"},{"type":"electronic","value":"1783-8738"}],"subject":[],"published":{"date-parts":[[2017,1,11]]},"assertion":[{"value":"25 November 2015","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 December 2016","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 January 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}