{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:13Z","timestamp":1750219753160,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T00:00:00Z","timestamp":1697846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,21]]},"DOI":"10.1145\/3583780.3615493","type":"proceedings-article","created":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T07:45:42Z","timestamp":1697874342000},"page":"4581-4587","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Predicting Interaction Quality of Conversational Assistants With Spoken Language Understanding Model Confidences"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9613-8736","authenticated-orcid":false,"given":"Yue","family":"Gao","sequence":"first","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8141-557X","authenticated-orcid":false,"given":"Enrico","family":"Piovano","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Berlin, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9292-3941","authenticated-orcid":false,"given":"Tamer","family":"Soliman","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Sunnyvale, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9009-5836","authenticated-orcid":false,"given":"Monir","family":"Moniruzzaman","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9124-7541","authenticated-orcid":false,"given":"Anoop","family":"Kumar","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9138-6069","authenticated-orcid":false,"given":"Melanie","family":"Bradford","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Berlin, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4855-2548","authenticated-orcid":false,"given":"Subhrangshu","family":"Nandi","sequence":"additional","affiliation":[{"name":"Amazon Alexa AI, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"SIGDIAL 2019 Workshop on Implications of Deep Learning for Dialog Modeling.","author":"Bodigutla Praveen Kumar","year":"2019","unstructured":"Praveen Kumar Bodigutla , Spyros Matsoukas , Longshaokan Marshall Wang , Kate Ridgeway , Joshua Levy , Swanand Joshi , and Alborz Geramifard . 2019 . Domain-Independent turn-level Dialogue Quality Evaluation via User Satisfaction Estimation . In SIGDIAL 2019 Workshop on Implications of Deep Learning for Dialog Modeling. Praveen Kumar Bodigutla, Spyros Matsoukas, Longshaokan Marshall Wang, Kate Ridgeway, Joshua Levy, Swanand Joshi, and Alborz Geramifard. 2019. Domain-Independent turn-level Dialogue Quality Evaluation via User Satisfaction Estimation. In SIGDIAL 2019 Workshop on Implications of Deep Learning for Dialog Modeling."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.347"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.44"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87987-9_8"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding . In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies , Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, 4171--4186. https:\/\/doi.org\/10. 18653\/v1\/N19--1423 10.18653\/v1 Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, 4171--4186. https:\/\/doi.org\/10.18653\/v1\/N19--1423"},{"key":"e_1_3_2_1_6_1","volume-title":"On statistical bias in active learning: How and when to fix it. arXiv preprint arXiv:2101.11665","author":"Farquhar Sebastian","year":"2021","unstructured":"Sebastian Farquhar , Yarin Gal , and Tom Rainforth . 2021. On statistical bias in active learning: How and when to fix it. arXiv preprint arXiv:2101.11665 ( 2021 ). Sebastian Farquhar, Yarin Gal, and Tom Rainforth. 2021. On statistical bias in active learning: How and when to fix it. arXiv preprint arXiv:2101.11665 (2021)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"e_1_3_2_1_8_1","volume-title":"KDD 2021 Workshop on Data-Efficient Machine Learning.","author":"Gupta Saurabh","year":"2021","unstructured":"Saurabh Gupta , Xing Fan , Derek Liu , Benjamin Yao , Yuan Ling , Kun Zhou , Tuan-Hung Pham , and Edward Guo . 2021 . RoBERTaIQ: An efficient framework for automatic interaction quality estimation of dialogue systems . In KDD 2021 Workshop on Data-Efficient Machine Learning. Saurabh Gupta, Xing Fan, Derek Liu, Benjamin Yao, Yuan Ling, Kun Zhou, Tuan-Hung Pham, and Edward Guo. 2021. RoBERTaIQ: An efficient framework for automatic interaction quality estimation of dialogue systems. In KDD 2021 Workshop on Data-Efficient Machine Learning."},{"key":"e_1_3_2_1_9_1","volume-title":"FPI: Failure Point Isolation in Large-scale Conversational Assistants. In Proceedings of the 2022 Conference of the North American","author":"Khaziev Rinat","year":"2022","unstructured":"Rinat Khaziev , Usman Shahid , Tobias R\u00f6ding , Rakesh Chada , Emir Kapanci , and Pradeep Natarajan . 2022 . FPI: Failure Point Isolation in Large-scale Conversational Assistants. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track. Association for Computational Linguistics , Hybrid : Seattle, Washington Online, 141--148. https:\/\/doi.org\/10.18653\/v1\/2022.naacl-industry.17 10.18653\/v1 Rinat Khaziev, Usman Shahid, Tobias R\u00f6ding, Rakesh Chada, Emir Kapanci, and Pradeep Natarajan. 2022. FPI: Failure Point Isolation in Large-scale Conversational Assistants. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track. Association for Computational Linguistics, Hybrid: Seattle, Washington Online, 141--148. https:\/\/doi.org\/10.18653\/v1\/2022.naacl-industry.17"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-3003"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. PMLR, 5753--5763","author":"Kossen Jannik","year":"2021","unstructured":"Jannik Kossen , Sebastian Farquhar , Yarin Gal , and Tom Rainforth . 2021 . Active testing: Sample-efficient model evaluation . In International Conference on Machine Learning. PMLR, 5753--5763 . Jannik Kossen, Sebastian Farquhar, Yarin Gal, and Tom Rainforth. 2021. Active testing: Sample-efficient model evaluation. In International Conference on Machine Learning. PMLR, 5753--5763."},{"key":"e_1_3_2_1_12_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out . Association for Computational Linguistics , Barcelona, Spain , 74--81. Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74--81."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1103"},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning. PMLR, 3759--3768","author":"Nguyen Phuc","year":"2018","unstructured":"Phuc Nguyen , Deva Ramanan , and Charless Fowlkes . 2018 . Active testing: An efficient and robust framework for estimating accuracy . In International Conference on Machine Learning. PMLR, 3759--3768 . Phuc Nguyen, Deva Ramanan, and Charless Fowlkes. 2018. Active testing: An efficient and robust framework for estimating accuracy. In International Conference on Machine Learning. PMLR, 3759--3768."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni , Salim Roukos , Todd Ward , and Wei-Jing Zhu . 2002 . Bleu: a Method for Automatic Evaluation of Machine Translation . In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics , Philadelphia, Pennsylvania, USA, 311--318. https:\/\/doi.org\/10.3115\/1073083.1073135 10.3115\/1073083.1073135 Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, 311--318. https:\/\/doi.org\/10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956048"},{"key":"e_1_3_2_1_17_1","volume-title":"Feedback-based self-learning in large-scale conversational ai agents. AI magazine","author":"Ponnusamy Pragaash","year":"2022","unstructured":"Pragaash Ponnusamy , Alireza Ghias , Yi Yi , Benjamin Yao , Chenlei Guo , and Ruhi Sarikaya . 2022. Feedback-based self-learning in large-scale conversational ai agents. AI magazine , Vol. 42 , 4 ( 2022 ), 43--56. Pragaash Ponnusamy, Alireza Ghias, Yi Yi, Benjamin Yao, Chenlei Guo, and Ruhi Sarikaya. 2022. Feedback-based self-learning in large-scale conversational ai agents. AI magazine, Vol. 42, 4 (2022), 43--56."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2015.06.003"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2015.06.003"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)","author":"Schmitt Alexander","year":"2012","unstructured":"Alexander Schmitt , Stefan Ultes , and Wolfgang Minker . 2012 . A Parameterized and Annotated Spoken Dialog Corpus of the Carnegie Mellon University Let's Go Bus Information System . In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12) . European Language Resources Association (ELRA), Istanbul, Turkey, 3369--3373. Alexander Schmitt, Stefan Ultes, and Wolfgang Minker. 2012. A Parameterized and Annotated Spoken Dialog Corpus of the Carnegie Mellon University Let's Go Bus Information System. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12). European Language Resources Association (ELRA), Istanbul, Turkey, 3369--3373."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track. Association for Computational Linguistics, Abu Dhabi, UAE, 371--378","author":"Schroedl Stefan","year":"2022","unstructured":"Stefan Schroedl , Manoj Kumar , Kiana Hajebi , Morteza Ziyadi , Sriram Venkatapathy , Anil Ramakrishna , Rahul Gupta , and Pradeep Natarajan . 2022 . Improving Large-Scale Conversational Assistants using Model Interpretation based Training Sample Selection . In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track. Association for Computational Linguistics, Abu Dhabi, UAE, 371--378 . https:\/\/aclanthology.org\/2022.emnlp-industry.37 Stefan Schroedl, Manoj Kumar, Kiana Hajebi, Morteza Ziyadi, Sriram Venkatapathy, Anil Ramakrishna, Rahul Gupta, and Pradeep Natarajan. 2022. Improving Large-Scale Conversational Assistants using Model Interpretation based Training Sample Selection. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track. Association for Computational Linguistics, Abu Dhabi, UAE, 371--378. https:\/\/aclanthology.org\/2022.emnlp-industry.37"},{"key":"e_1_3_2_1_23_1","volume-title":"Detecting, root-causing, and fixing NLU model errors. arXiv preprint arXiv:2110.06384","author":"Sethi Pooja","year":"2021","unstructured":"Pooja Sethi , Denis Savenkov , Forough Arabshahi , Jack Goetz , Micaela Tolliver , Nicolas Scheffer , Ilknur Kabul , Yue Liu , and Ahmed Aly . 2021. AutoNLU : Detecting, root-causing, and fixing NLU model errors. arXiv preprint arXiv:2110.06384 ( 2021 ). Pooja Sethi, Denis Savenkov, Forough Arabshahi, Jack Goetz, Micaela Tolliver, Nicolas Scheffer, Ilknur Kabul, Yue Liu, and Ahmed Aly. 2021. AutoNLU: Detecting, root-causing, and fixing NLU model errors. arXiv preprint arXiv:2110.06384 (2021)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.220"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639519"},{"key":"e_1_3_2_1_26_1","volume-title":"\u0141 ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , \u0141 ukasz Kaiser, and Illia Polosukhin . 2017 . Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30 . Curran Associates, Inc . Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc."},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the 2021 Conference of the North American","author":"Wang Tong","year":"1865","unstructured":"Tong Wang , Jiangning Chen , Mohsen Malmir , Shuyan Dong , Xin He , Han Wang , Chengwei Su , Yue Liu , and Yang Liu . 2021. Optimizing NLU Reranking Using Entity Resolution Signals in Multi-domain Dialog Systems . In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers. Association for Computational Linguistics , Online , 19--25. https:\/\/doi.org\/10. 1865 3\/v1\/2021.naacl-industry.3 10.18653\/v1 Tong Wang, Jiangning Chen, Mohsen Malmir, Shuyan Dong, Xin He, Han Wang, Chengwei Su, Yue Liu, and Yang Liu. 2021. Optimizing NLU Reranking Using Entity Resolution Signals in Multi-domain Dialog Systems. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers. Association for Computational Linguistics, Online, 19--25. https:\/\/doi.org\/10.18653\/v1\/2021.naacl-industry.3"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.dash-1.2"}],"event":{"name":"CIKM '23: The 32nd ACM International Conference on Information and Knowledge Management","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Birmingham United Kingdom","acronym":"CIKM '23"},"container-title":["Proceedings of the 32nd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615493","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3583780.3615493","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:55Z","timestamp":1750178215000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615493"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,21]]},"references-count":28,"alternative-id":["10.1145\/3583780.3615493","10.1145\/3583780"],"URL":"https:\/\/doi.org\/10.1145\/3583780.3615493","relation":{},"subject":[],"published":{"date-parts":[[2023,10,21]]},"assertion":[{"value":"2023-10-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}