{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T11:55:43Z","timestamp":1716983743431},"reference-count":27,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"11","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2015]]},"DOI":"10.1587\/transinf.2015edp7014","type":"journal-article","created":{"date-parts":[[2015,10,31]],"date-time":"2015-10-31T22:13:15Z","timestamp":1446329595000},"page":"1923-1931","source":"Crossref","is-referenced-by-count":2,"title":["Posteriori Restoration of Turn-Taking and ASR Results for Incorrectly Segmented Utterances"],"prefix":"10.1587","volume":"E98.D","author":[{"given":"Kazunori","family":"KOMATANI","sequence":"first","affiliation":[{"name":"The Institute of Scientific and Industrial Research, Osaka University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Naoki","family":"HOTTA","sequence":"additional","affiliation":[{"name":"Graduate School of Engineering, Nagoya University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoshi","family":"SATO","sequence":"additional","affiliation":[{"name":"Graduate School of Engineering, Nagoya University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikio","family":"NAKANO","sequence":"additional","affiliation":[{"name":"Honda Research Institute Japan, Co., Ltd."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","unstructured":"[1] K. Komatani, N. Hotta, and S. Sato, \u201cRestoring incorrectly segmented keywords and turn-taking caused by short pauses,\u201d Proc. International Workshop on Spoken Dialogue Systems (IWSDS), pp.27-38, 2014."},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] N. Hotta, K. Komatani, S. Sato, and M. Nakano, \u201cDetecting incorrectly-segmented utterances for posteriori restoration of turn-taking and asr results,\u201d Proc. Annual Conference of the International Speech Communication Association (INTERSPEECH), pp.313-317, 2014.","DOI":"10.21437\/Interspeech.2014-75"},{"key":"3","unstructured":"[3] B. Shneiderman, Designing the User Interface, 3rd Edition, Addison-Wesley, 1997."},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] A. Lee, K. Oura, and K. Tokuda, \u201cMMDAgent \u2014 A fully open-source toolkit for voice interaction systems,\u201d Proc. IEEE International Conference on Acoustics, Speech &amp; Signal Processing (ICASSP), pp.8382-8385, 2013.","DOI":"10.1109\/ICASSP.2013.6639300"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] A. Benyassine, E. Shlomot, H.-Y. Su, D. Massaloux, C. Lamblin, and J.-P. Petit, \u201cITU-T Recommendation G.729 Annex B: A silence compression scheme for use with G.729 optimized for v.70 digital simultaneous voice and data applications,\u201d IEEE Commun. Mag., vol.35, no.9, pp.64-73, 1997.","DOI":"10.1109\/35.620527"},{"key":"6","unstructured":"[6] E.E. Jan, B. Maison, L. Mangu, and G. Zweig, \u201cAutomatic construction of unique signatures and confusable sets for natural language directory assistance application,\u201d Proc. European Conf. Speech Commun. &amp; Tech. (EUROSPEECH), pp.1249-1252, 2003."},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] M. Katsumaru, K. Komatani, T. Ogata, and H.G. Okuno, \u201cAdjusting occurrence probabilities of automatically-generated abbreviated words in spoken dialogue systems,\u201d Next-Generation Applied Intelligence, Lecture Notes in Computer Science, vol.5579, pp.481-490, Springer, Berlin, Heidelberg, 2009.","DOI":"10.1007\/978-3-642-02568-6_49"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] A. Lee, K. Nakamura, R. Nisimura, H. Saruwatari, and K. Shikano, \u201cNoise robust real world spoken dialogue system using GMM based rejection of unintended inputs,\u201d Proc. Int&apos;l Conf. Spoken Language Processing (ICSLP), pp.173-176, 2004.","DOI":"10.21437\/Interspeech.2004-111"},{"key":"9","unstructured":"[9] M. Nakano, S. Sato, K. Komatani, K. Matsuyama, K. Funakoshi, and H.G. Okuno, \u201cA two-stage domain selection framework for extensible multi-domain spoken dialogue systems,\u201d Proc. Annual Meeting of the Special Interest Group in Discourse and Dialogue (SIGDIAL), pp.18-29, June 2011."},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] R. Sato, R. Higashinaka, M. Tamoto, M. Nakano, and K. Aikawa, \u201cLearning decision trees to determine turn-taking by spoken dialogue systems,\u201d Proc. Int&apos;l Conf. Spoken Language Processing (ICSLP), pp.861-864, 2002.","DOI":"10.21437\/ICSLP.2002-293"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] T. Ohsuga, M. Nishida, Y. Horiuchi, and A. Ichikawa, \u201cInvestigation of the relationship between turn-taking and prosodic features in spontaneous dialogue,\u201d Proc. European Conf. Speech Commun. &amp; Tech. (EUROSPEECH), 2005.","DOI":"10.21437\/Interspeech.2005-32"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] N. Kitaoka, M. Takeuchi, R. Nishimura, and S. Nakagawa, \u201cResponse timing detection using prosodic and linguistic information for human-friendly spoken dialog systems,\u201d Transactions of the Japanese Society for Artificial Intellignece, vol.20, no.3, pp.220-228, 2005.","DOI":"10.1527\/tjsai.20.220"},{"key":"13","unstructured":"[13] J. Edlund, M. Heldner, and J. Gustafson, \u201cUtterance segmentation and turn-taking in spoken dialogue systems,\u201d Computer Studies in Language and Speech, pp.576-587, 2005."},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] A. Raux and M. Eskenazi, \u201cOptimizing endpointing thresholds using dialogue features in a spoken dialogue system,\u201d Proc. SIGdial Workshop on Discourse and Dialogue, pp.1-10, 2008.","DOI":"10.3115\/1622064.1622066"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] A. Raux and M. Eskenazi, \u201cA finite-state turn-taking model for spoken dialog systems,\u201d Proc. Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics (HLT NAACL), pp.629-637, 2009.","DOI":"10.3115\/1620754.1620846"},{"key":"16","unstructured":"[16] G. Skantze and A. Hjalmarsson, \u201cTowards incremental speech generation in dialogue systems,\u201d Proc. Annual Meeting of the Special Interest Group in Discourse and Dialogue (SIGDIAL), pp.1-8, 2010."},{"key":"17","unstructured":"[17] T. Baumann and D. Schlangen, \u201cPredicting the micro-timing of user input for an incremental spoken dialogue system that completes a user&apos;s ongoing turn,\u201d Proc. Annual Meeting of the Special Interest Group in Discourse and Dialogue (SIGDIAL), pp.120-129, 2011."},{"key":"18","unstructured":"[18] E. Selfridge, I. Arizmendi, P.A. Heeman, and J.D. Williams, \u201cStability and accuracy in incremental speech recognition,\u201d Proc. Annual Meeting of the Special Interest Group in Discourse and Dialogue (SIGDIAL), pp.110-119, 2011."},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] D. Traum, D. DeVault, J. Lee, Z. Wang, and S. Marsella, \u201cIncremental dialogue understanding and feedback for multiparty, multimodal conversation,\u201d Intelligent Virtual Agents, Lecture Notes in Computer Science, vol.7502, pp.275-288, Springer, Berlin, Heidelberg, 2012.","DOI":"10.1007\/978-3-642-33197-8_29"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] H. Sakai, T. Cincarek, H. Kawanami, H. Saruwatari, K. Shikano, and A. Lee, \u201cVoice activity detection applied to hands-free spoken dialogue robot based on decoding using acoustic and language model,\u201d Proc. 1st International Conference on Robot Communication and Coordination (ROBOCOMM), 2007.","DOI":"10.4108\/ICST.ROBOCOMM2007.2088"},{"key":"21","unstructured":"[21] A. Lee and T. Kawahara, \u201cRecent development of open-source speech recognition engine Julius,\u201d Proc. APSIPA ASC: Asia-Pacific Signal and Information Processing Association, Annual Summit and Conference, pp.131-137, 2009."},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] M. Nakano, N. Miyazaki, J.-I. Hirasawa, K. Dohsaka, and T. Kawabata, \u201cUnderstanding unsegmented user utterances in real-time spoken dialogue systems,\u201d Proc. 37th Annual Meeting of the Association for Computational Linguistics (ACL), pp.200-207, 1999.","DOI":"10.3115\/1034678.1034715"},{"key":"23","unstructured":"[23] L. Bell, J. Boye, and J. Gustafson, \u201cReal-time handling of fragmented utterances,\u201d Proc. NAACL Workshop on Adaption in Dialogue Systems, pp.2-8, 2001."},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] M.G. Core and L.K. Schubert, \u201cA syntactic framework for speech repairs and other disruptions,\u201d Proc. 37th Annual Meeting of the Association for Computational Linguistics (ACL), pp.413-420, 1999.","DOI":"10.3115\/1034678.1034742"},{"key":"25","unstructured":"[25] P.A. Heeman and J.F. Allen, \u201cSpeech repairs, intonational phrases and discourse markers: Modeling speakers&apos; utterances in spoken dialogue,\u201d Computational Linguistics, vol.25, pp.527-571, 1999."},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] Y. Liu, E. Shriberg, A. Stolcke, D. Hillard, M. Ostendorf, and M. Harper, \u201cEnriching speech recognition with automatic detection of sentence boundaries and disfluencies,\u201d IEEE Trans. Audio Speech Language Process., vol.14, no.5, pp.1526-1540, Sept. 2006.","DOI":"10.1109\/TASL.2006.878255"},{"key":"27","unstructured":"[27] K. Georgila, N. Wang, and J. Gratch, \u201cCross-domain speech disfluency detection,\u201d Proc. Annual Meeting of the Special Interest Group in Discourse and Dialogue (SIGDIAL), pp.237-240, 2010."}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E98.D\/11\/E98.D_2015EDP7014\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,15]],"date-time":"2023-08-15T18:11:01Z","timestamp":1692123061000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E98.D\/11\/E98.D_2015EDP7014\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"references-count":27,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2015]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2015edp7014","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015]]}}}