{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T13:48:39Z","timestamp":1767275319826},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319921075"},{"type":"electronic","value":"9783319921082"}],"license":[{"start":{"date-parts":[[2018,8,2]],"date-time":"2018-08-02T00:00:00Z","timestamp":1533168000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-319-92108-2_25","type":"book-chapter","created":{"date-parts":[[2018,8,1]],"date-time":"2018-08-01T10:54:51Z","timestamp":1533120891000},"page":"247-258","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Yeah, Right, Uh-Huh: A Deep Learning Backchannel Predictor"],"prefix":"10.1007","author":[{"given":"Robin","family":"Ruede","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Markus","family":"M\u00fcller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sebastian","family":"St\u00fcker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex","family":"Waibel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,2]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Dieleman S, Schlter J, Raffel C, Olson E, S\u00f8nderby SK et al (2015) Lasagne: first release. https:\/\/doi.org\/10.5281\/zenodo.27878","key":"25_CR1","DOI":"10.5281\/zenodo.27878"},{"key":"25_CR2","first-page":"249","volume":"9","author":"X Glorot","year":"2010","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. Aistats 9:249\u2013256","journal-title":"Aistats"},{"unstructured":"Godfrey J, Holliman E (1993) Switchboard-1 release 2. https:\/\/catalog.ldc.upenn.edu\/ldc97s62","key":"25_CR3"},{"unstructured":"Harkins D et\u00a0al (2003) ISIP switchboard word alignments. https:\/\/www.isip.piconepress.com\/projects\/switchboard\/","key":"25_CR4"},{"doi-asserted-by":"crossref","unstructured":"Huang L, Morency LP, Gratch J (2010) Learning backchannel prediction model from parasocial consensus sampling: a subjective evaluation. In: International conference on intelligent virtual agents. Springer, pp 159\u2013172","key":"25_CR5","DOI":"10.1007\/978-3-642-15892-6_17"},{"unstructured":"Jurafsky D, Van Ess-Dykema C et\u00a0al (1997) Switchboard discourse language modeling project","key":"25_CR6"},{"unstructured":"Kawahara T, Uesato M, Yoshino K, Takanashi K (2015) Toward adaptive generation of backchannels for attentive listening agents. In: International workshop serien on spoken dialogue systems technology, pp 1\u201310","key":"25_CR7"},{"doi-asserted-by":"crossref","unstructured":"Kawahara T, Yamaguchi T, Inoue K, Takanashi K, Ward N (2016) Prediction and generation of backchannel form for attentive listening systems. In: Proceedings of the INTERSPEECH, vol 2016","key":"25_CR8","DOI":"10.21437\/Interspeech.2016-118"},{"unstructured":"Kingma D, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980","key":"25_CR9"},{"unstructured":"de\u00a0Kok I, Heylen D (2012) A survey on evaluation metrics for backchannel prediction models. In: Proceedings of the interdisciplinary workshop on feedback behaviors in dialog","key":"25_CR10"},{"unstructured":"Kok ID, Heylen D (2012) A survey on evaluation metrics for backchannel prediction models. In: Feedback behaviors in dialog","key":"25_CR11"},{"key":"25_CR12","first-page":"29","volume":"2008","author":"K Laskowski","year":"2008","unstructured":"Laskowski K, Heldner M, Edlund J (2008) The fundamental frequency variation spectrum. Proc Fon 2008:29\u201332","journal-title":"Proc Fon"},{"doi-asserted-by":"publisher","unstructured":"Levin L, Lavie A, Woszczyna M, Gates D, Gavald\u00e1 M, Koll D, Waibel A (2000) The janus-iii translation system: speech-to-speech translation in multiple domains. Mach Trans 15(1):3\u201325. https:\/\/doi.org\/10.1023\/A:1011186420821","key":"25_CR13","DOI":"10.1023\/A:1011186420821"},{"unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","key":"25_CR14"},{"doi-asserted-by":"crossref","unstructured":"M\u00fcller M, Leuschner D, Briem L, Schmidt M, Kilgour K, St\u00fcker S, Waibel A (2015) Using neural networks for data-driven backchannel prediction: a survey on input features and training techniques. In: International conference on human-computer interaction. Springer, pp 329\u2013340","key":"25_CR15","DOI":"10.1007\/978-3-319-20916-6_31"},{"unstructured":"Mockus J (1974) On bayesian methods for seeking the extremum. In: Proceedings of the IFIP technical conference. Springer, London, pp 400\u2013404. http:\/\/dl.acm.org\/citation.cfm?id=646296.687872","key":"25_CR16"},{"doi-asserted-by":"publisher","unstructured":"Morency LP, de Kok I, Gratch J (2010) A probabilistic multimodal approach for predicting listener backchannels. Auton Agent Multi-Agent Syst 20(1):70\u201384. https:\/\/doi.org\/10.1007\/s10458-009-9092-y","key":"25_CR17","DOI":"10.1007\/s10458-009-9092-y"},{"key":"25_CR18","doi-asserted-by":"publisher","first-page":"2513","DOI":"10.21437\/Interspeech.2016-154","volume":"2016","author":"J Niehues","year":"2016","unstructured":"Niehues J, Nguyen TS, Cho E, Ha TL, Kilgour K, M\u00fcller M, Sperber M, St\u00fcker S, Waibel A (2016) Dynamic transcription for low-latency speech translation. Interspeech 2016:2513\u20132517","journal-title":"Interspeech"},{"doi-asserted-by":"crossref","unstructured":"Ries K (1999) HMM and neural network based speech act detection. In: Proceedings of the IEEE international conference on acoustics, speech, and signal processing, 1999, vol 1. IEEE Computer Society, pp 497\u2013500","key":"25_CR19","DOI":"10.1109\/ICASSP.1999.758171"},{"issue":"2","key":"25_CR20","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1109\/T-AFFC.2011.34","volume":"3","author":"M Schroder","year":"2012","unstructured":"Schroder M, Bevacqua E, Cowie R, Eyben F, Gunes H, Heylen D, Ter Maat M, McKeown G, Pammi S, Pantic M et al (2012) Building autonomous sensitive artificial listeners. IEEE Trans Affect Comput 3(2):165\u2013183","journal-title":"IEEE Trans Affect Comput"},{"issue":"1","key":"25_CR21","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton GE, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"issue":"3","key":"25_CR22","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1162\/089120100561737","volume":"26","author":"A Stolcke","year":"2000","unstructured":"Stolcke A, Ries K, Coccaro N, Shriberg E, Bates R, Jurafsky D, Taylor P, Martin R, Van Ess-Dykema C, Meteer M (2000) Dialogue act modeling for automatic tagging and recognition of conversational speech. Comput Linguist 26(3):339\u2013373","journal-title":"Comput Linguist"},{"unstructured":"Stolcke A, et\u00a0al (1998) Dialog act modeling for conversational speech. In: AAAI spring symposium on applying machine learning to discourse processing, pp 98\u2013105","key":"25_CR23"},{"unstructured":"Theano Development Team: Theano: a python framework for fast computation of mathematical expressions (2016). arXiv e-prints http:\/\/arxiv.org\/abs\/1605.02688","key":"25_CR24"},{"doi-asserted-by":"crossref","unstructured":"Truong KP, Poppe RW, Heylen DKJ (2010) A rule-based backchannel prediction model using pitch and pause information. In: Proceedings of the interspeech 2010, Makuhari, Chiba, Japan. International Speech Communication Association (ISCA), pp 3058\u20133061","key":"25_CR25","DOI":"10.21437\/Interspeech.2010-59"},{"issue":"3","key":"25_CR26","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/29.21701","volume":"37","author":"A Waibel","year":"1989","unstructured":"Waibel A, Hanazawa T, Hinton G, Shikano K, Lang KJ (1989) Phoneme recognition using time-delay neural networks. IEEE Trans Acoust Speech Signal Process 37(3):328\u2013339","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"issue":"8","key":"25_CR27","doi-asserted-by":"publisher","first-page":"1177","DOI":"10.1016\/S0378-2166(99)00109-5","volume":"32","author":"N Ward","year":"2000","unstructured":"Ward N, Tsukahara W (2000) Prosodic features which cue back-channel responses in English and Japanese. J Pragmat 32(8):1177\u20131207","journal-title":"J Pragmat"}],"container-title":["Lecture Notes in Electrical Engineering","Advanced Social Interaction with Agents"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-92108-2_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,28]],"date-time":"2022-08-28T15:28:36Z","timestamp":1661700516000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-92108-2_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,2]]},"ISBN":["9783319921075","9783319921082"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-92108-2_25","relation":{},"ISSN":["1876-1100","1876-1119"],"issn-type":[{"type":"print","value":"1876-1100"},{"type":"electronic","value":"1876-1119"}],"subject":[],"published":{"date-parts":[[2018,8,2]]}}}