{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T08:55:41Z","timestamp":1765961741004,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,19]],"date-time":"2023-09-19T00:00:00Z","timestamp":1695081600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,19]]},"DOI":"10.1145\/3570945.3607298","type":"proceedings-article","created":{"date-parts":[[2023,12,22]],"date-time":"2023-12-22T06:07:02Z","timestamp":1703225222000},"page":"1-4","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Prediction of Various Backchannel Utterances Based on Multimodal Information"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4604-5396","authenticated-orcid":false,"given":"Toshiki","family":"Onishi","sequence":"first","affiliation":[{"name":"Nihon University, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5653-3776","authenticated-orcid":false,"given":"Naoki","family":"Azuma","sequence":"additional","affiliation":[{"name":"Nihon University, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7683-2879","authenticated-orcid":false,"given":"Shunichi","family":"Kinoshita","sequence":"additional","affiliation":[{"name":"Nihon University, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3849-1656","authenticated-orcid":false,"given":"Ryo","family":"Ishii","sequence":"additional","affiliation":[{"name":"NTT Corporation Kanagawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2133-016X","authenticated-orcid":false,"given":"Atsushi","family":"Fukayama","sequence":"additional","affiliation":[{"name":"NTT Corporation Kanagawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8267-871X","authenticated-orcid":false,"given":"Takao","family":"Nakamura","sequence":"additional","affiliation":[{"name":"NTT Corporation Kanagawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4010-9487","authenticated-orcid":false,"given":"Akihiro","family":"Miyata","sequence":"additional","affiliation":[{"name":"Nihon University, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,12,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_1_3_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT '19)","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT '19). 4171--4186."},{"key":"e_1_3_2_1_4_1","volume-title":"Friesen","author":"Ekman Paul","year":"1977","unstructured":"Paul Ekman and Wallace V. Friesen. 1977. Manual for the Facial Action Coding System. Palo Alto: Consulting Psychologists Press (1977)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-400"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1442"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Shawn Hershey Sourish Chaudhuri Daniel P.W. Ellis Jort F. Gemmeke Aren Jansen R. Channing Moore Manoj Plakal Devin Platt Rif A. Saurous Bryan Seybold Malcolm Slaney Ron J. Weiss and Kevin Wilson. 2017. CNN architectures for large-scale audio classification. In 2017 ieee international conference on acoustics speech and signal processing (ICASSP '17). 131--135.","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"e_1_3_2_1_10_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS '10)","author":"Huang Lixing","year":"2010","unstructured":"Lixing Huang, Louis-Philippe Morency, and Jonathan Gratch. 2010. Parasocial Consensus Sampling: Combining Multiple Perspectives to Learn Virtual Human Behavior. In Proc. 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS '10). 1265--1272."},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. 11th International Conference on Language Resources and Evaluation (LREC '18)","author":"Ishii Ryo","year":"2018","unstructured":"Ryo Ishii, Ryuichiro Higashinaka, and Junji Tomita. 2018. Predicting Nods by using Dialogue Acts in Dialogue. In Proc. 11th International Conference on Language Resources and Evaluation (LREC '18). 2940--2944."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472306.3478360"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-9443-0_6"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1177\/002383099804100404"},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. 2004 conference on empirical methods in natural language processing. 230--237","author":"Kudo Taku","year":"2004","unstructured":"Taku Kudo, Kaoru Yamamoto, and Yuji Matsumoto. 2004. Applying conditional random fields to Japanese morphological analysis. In Proc. 2004 conference on empirical methods in natural language processing. 230--237."},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. 30th International Conferenceon Machine Learning (ICML '13)","author":"Maas Andrew L","year":"2013","unstructured":"Andrew L Maas, Awni Y Hannun, Andrew Y Ng, et al. 2013. Rectifier non-linearities improve neural network acoustic models. In Proc. 30th International Conferenceon Machine Learning (ICML '13)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1515\/ling.1986.24.6.1079"},{"key":"e_1_3_2_1_19_1","volume-title":"Predicting Listener Backchannels: A Probabilistic Multimodal Approach. In International Workshop on Intelligent Virtual Agents (IVA '08)","author":"Morency Louis-Philippe","year":"2008","unstructured":"Louis-Philippe Morency, Iwan De Kok, and Jonathan Gratch. 2008. Predicting Listener Backchannels: A Probabilistic Multimodal Approach. In International Workshop on Intelligent Virtual Agents (IVA '08). 176--190."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514197.3549619"},{"key":"e_1_3_2_1_21_1","volume-title":"Using Neural Networks for Data-Driven Backchannel Prediction: A Survey on Input Features and Training Techniques. In International Conference on Human-Computer Interaction (HCI '15)","author":"Mueller Markus","year":"2015","unstructured":"Markus Mueller, David Leuschner, Lars Briem, Maria Schmidt, Kevin Kilgour, Sebastian Stueker, and Alex Waibel. 2015. Using Neural Networks for Data-Driven Backchannel Prediction: A Survey on Input Features and Training Techniques. In International Conference on Human-Computer Interaction (HCI '15). 329--340."},{"key":"e_1_3_2_1_22_1","volume-title":"The Use of Back-channels by Advanced Learners of Japanese: Its Qualitative and Quantitative Aspects. Japanese language education around the globe 9","author":"Mukai Chiharu","year":"1999","unstructured":"Chiharu Mukai. 1999. The Use of Back-channels by Advanced Learners of Japanese: Its Qualitative and Quantitative Aspects. Japanese language education around the globe 9 (1999), 197--219."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-92108-2_25"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Bj\u00f6rn Schuller Stefan Steidl and Anton Batliner. 2009. The interspeech 2009 emotion challenge. (2009).","DOI":"10.21437\/Interspeech.2009-103"},{"key":"e_1_3_2_1_25_1","volume-title":"11th Annual Conference of the International Speech Communication Association (INTERSPEECH '10)","author":"Truong Khiet P.","year":"2010","unstructured":"Khiet P. Truong, Ronald Poppe, and Dirk Heylen. 2010. Arule-based backchannel prediction model using pitch and pause information. In 11th Annual Conference of the International Speech Communication Association (INTERSPEECH '10). 3058--3061."},{"volume":"3","volume-title":"Proc. 4th International Conference on Spoken Language Processing (ICSLP '96)","key":"e_1_3_2_1_26_1","unstructured":"NigelWard.1996. Using Prosodic Clues to Decide When to Produce Back-channel Utterances. In Proc. 4th International Conference on Spoken Language Processing (ICSLP '96), Vol. 3. 1728--1731."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0378-2166(99)00109-5"}],"event":{"name":"IVA '23: ACM International Conference on Intelligent Virtual Agents","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence"],"location":"W\u00fcrzburg Germany","acronym":"IVA '23"},"container-title":["Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570945.3607298","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3570945.3607298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:26:32Z","timestamp":1755872792000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3570945.3607298"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,19]]},"references-count":27,"alternative-id":["10.1145\/3570945.3607298","10.1145\/3570945"],"URL":"https:\/\/doi.org\/10.1145\/3570945.3607298","relation":{},"subject":[],"published":{"date-parts":[[2023,9,19]]},"assertion":[{"value":"2023-12-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}