{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T11:21:41Z","timestamp":1762341701506,"version":"3.40.3"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030731120"},{"type":"electronic","value":"9783030731137"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-73113-7_12","type":"book-chapter","created":{"date-parts":[[2021,7,22]],"date-time":"2021-07-22T15:03:19Z","timestamp":1626966199000},"page":"133-144","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["BERT-Based Dialogue Evaluation Methods with RUBER Framework"],"prefix":"10.1007","author":[{"given":"Khin Thet","family":"Htar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianming","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gen","family":"Hattori","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aye","family":"Thida","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,7,23]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Walker, Marilyn A., et al.: PARADISE: a framework for evaluating spoken dialogue agents. In: 35th Annual Meeting of the Association for Computational Linguistics and 8th Conference of the European Chapter of the Association for Computational Linguistics, Association for Computational Linguistics (1997)","DOI":"10.3115\/976909.979652"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"M\u00f6ller, S., et al.: MeMo: towards automatic usability evaluation of spoken dialogue services by user error simulations. In: Ninth International Conference on Spoken Language Processing (2006)","DOI":"10.21437\/Interspeech.2006-494"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"12_CR4","unstructured":"Banerjee, S., Lavie, A.: METEOR : an automatic metric for MT evaluation with improved correlation with human judgements. In: Proceedings of the Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarisation (2005)"},{"key":"12_CR5","unstructured":"Lin, C.-Y.: ROUGE : a package for automatic evaluation of summaries. In: Text Summarisation Branches out : Proceedings of the ACL-04 Workshop (2004)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Liu, C.-W., Lowe, R., Serban, I., Noseworthy, M., Charlin, L., Pineau, J.: How NOT to evaluate your dialogue system : an empirical study of unsupervised evaluation metrics for dialogue response generation. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (2016)","DOI":"10.18653\/v1\/D16-1230"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Lowe, R., Noseworthy, M., Serban, I.V., Angerlard-Gontier, N., Bengio, Y., Pineau, J.: Towards an automatic turing test : learning to evaluate dialogue responses. In: Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics, pp. 1116\u20131126 (2017)","DOI":"10.18653\/v1\/P17-1103"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Tao, C., Mou, L., Zhao, D., Yan, R.: RUBER : an unsupervised method for automatic evaluation of open-domain dialog systems. In: Mcllraith and Weinberger (2018)","DOI":"10.1609\/aaai.v32i1.11321"},{"key":"12_CR9","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT : pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, pp. 4171\u20134186 (2018)"},{"key":"12_CR10","unstructured":"Li, Y., et al.: Dailydialog: A manually labelled multi-turn dialogue dataset. In: Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers) (2017)"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Williams, A., Nangia, N., Bowman, S.: A broad-coverage challenge corpus for sentence understanding through inference. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pp. 1112\u20131122 (2018)","DOI":"10.18653\/v1\/N18-1101"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Imamura, K., Sumita, E.: Recycling a pre-trained bert encoder for neural machine translation. In: Proceedings of the 3rd Workshop on Neural Generation and Translation (WNGT 2019), pp. 23\u201331 (2019)","DOI":"10.18653\/v1\/D19-5603"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevich, I.: Sentence-bert : Sentence embeddings using siamese bert-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Yan, R., Song, Y., Wu, H.: Learning to respond with deep neural networks for retrieval-based human-computer conversation system. In: Proceedings of the 39th International CM SIGIR conference on Research and Development in Information Retrieval. pp. 55\u201364 (2016)","DOI":"10.1145\/2911451.2911542"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Luong, T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. In: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"12_CR16","unstructured":"Danescu-Niculescu-Mizil, C., Lee, L.: Chameleons in imagined conversations : a new approach to understanding coordination of linguistic style in dialogs. In: Proceedings of the 2nd workshop on cognitive modelling and computational linguistics, Association for Computational Linguistics (2011)"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Lowe, R., Pow, N., Serban, I., Pineau, J.: The Ubuntu dialogue corpus: a large dataset for research in unstructured multi-turn dialogue systems. In: Proceedings of the 16th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 285\u2013294, Prague, Czech Republic. Association for Computational Linguistics (2015)","DOI":"10.18653\/v1\/W15-4640"},{"key":"12_CR18","unstructured":"Zhao, T., Pavel, A., Eskenazi, M., Gupta, P., Mehri, S., Bigham, J.P.: Investigating evaluation of open-domain dialogue systems with human generated multiple references. In: Proceedings of the SIGDial 2019 Conference, pp. 379\u2013391 (2019)"},{"key":"12_CR19","unstructured":"Ritter, A., Cherry, C., Dolan, W.B.: Data-driven response generation in social media. In: Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing, pp. 583\u2013593, Edinburgh, Scotland, UK. Association for Computational Linguistics (2011)"},{"key":"12_CR20","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1037\/h0031619","volume":"76","author":"JL Fleiss","year":"1971","unstructured":"Fleiss, J.L.: Measuring nominal scale agreement among many raters. Psychol. Bull. 76, 378\u2013382 (1971)","journal-title":"Psychol. Bull."},{"issue":"1","key":"12_CR21","doi-asserted-by":"publisher","first-page":"159","DOI":"10.2307\/2529310","volume":"33","author":"J Landis","year":"1977","unstructured":"Landis, J., Koch, G.: The measurement of observer agreement for categorical data. Biometrics 33(1), 159\u201374 (1977)","journal-title":"Biometrics"}],"container-title":["Advances in Intelligent Systems and Computing","Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-73113-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T06:02:23Z","timestamp":1672898543000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-73113-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030731120","9783030731137"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-73113-7_12","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"23 July 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Annual Conference of the Japanese Society for Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kumamoto-ken","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 June 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 June 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsai2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ai-gakkai.or.jp\/jsai2020\/en","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}