{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T02:14:34Z","timestamp":1781057674770,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nd\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3664646.3664778","type":"proceedings-article","created":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T19:39:56Z","timestamp":1720640396000},"page":"161-169","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["RUBICON: Rubric-Based Evaluation of Domain-Specific Human AI Conversations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8178-469X","authenticated-orcid":false,"given":"Param","family":"Biyani","sequence":"first","affiliation":[{"name":"Microsoft, Bengaluru, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9042-1946","authenticated-orcid":false,"given":"Yasharth","family":"Bajpai","sequence":"additional","affiliation":[{"name":"Microsoft, Bengaluru, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5559-5932","authenticated-orcid":false,"given":"Arjun","family":"Radhakrishna","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8061-9000","authenticated-orcid":false,"given":"Gustavo","family":"Soares","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9226-9634","authenticated-orcid":false,"given":"Sumit","family":"Gulwani","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,7,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Param Biyani Yasharth Bajpai Arjun Radhakrishna Gustavo Soares and Sumit Gulwani. 2024. Supplementary Material for RUBICON. https:\/\/aka.ms\/rubicon-supplementary"},{"key":"e_1_3_2_1_2_1","unstructured":"Praveen Kumar Bodigutla Lazaros Polymenakos and Spyros Matsoukas. 2019. Multi-domain Conversation Quality Evaluation via User Satisfaction Estimation. arxiv:1911.08567."},{"key":"e_1_3_2_1_3_1","unstructured":"Bhavya Chopra Yasharth Bajpai Param Biyani Gustavo Soares Arjun Radhakrishna Chris Parnin and Sumit Gulwani. 2024. Exploring Interaction Patterns for Debugging: Enhancing Conversational Capabilities of AI-assistants. https:\/\/api.semanticscholar.org\/CorpusID:267617149"},{"key":"e_1_3_2_1_4_1","unstructured":"Ond\u0159ej Du\u0161ek Jekaterina Novikova and Verena Rieser. 2017. Referenceless Quality Estimation for Natural Language Generation. arxiv:1708.01759."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Wieland Eckert Esther Levin and R. Pieraccini. 1998. User modeling for spoken dialogue system evaluation. 80 - 87 pages. isbn:0-7803-3698-4 https:\/\/doi.org\/10.1109\/ASRU.1997.658991 10.1109\/ASRU.1997.658991","DOI":"10.1109\/ASRU.1997.658991"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Ryan Fellows H. Ihshaish Steve Battle Ciaran Haines Peter Mayhew and J. Ignacio Deza. 2021. Task-oriented Dialogue Systems: performance vs. quality-optima a review. https:\/\/api.semanticscholar.org\/CorpusID:245353758","DOI":"10.5121\/csit.2022.121306"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/15228053.2023.2233814"},{"key":"e_1_3_2_1_8_1","article-title":"Survey of the state of the art in natural language generation: core tasks, applications and evaluation","volume":"61","author":"Gatt Albert","year":"2018","unstructured":"Albert Gatt and Emiel Krahmer. 2018. Survey of the state of the art in natural language generation: core tasks, applications and evaluation. J. Artif. Int. Res., 61, 1 (2018), jan, 65\u2013170. issn:1076-9757","journal-title":"J. Artif. Int. Res."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10515-023-00409-6"},{"key":"e_1_3_2_1_10_1","unstructured":"GitHub. 2023. GitHub Copilot. https:\/\/github.com\/features\/copilot"},{"key":"e_1_3_2_1_11_1","volume-title":"Studies in the Way of Words","author":"Grice Herbert Paul","unstructured":"Herbert Paul Grice. 1989. Studies in the Way of Words. Harvard University Press, Cambridge."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"Dinesh Kalla Nathan Smith Dr. Sivaraju Kuraku and Fnu Samaah. 2023. Study and Analysis of Chat GPT and its Impact on Different Fields of Study. https:\/\/doi.org\/10.5281\/zenodo.10250455 10.5281\/zenodo.10250455","DOI":"10.5281\/zenodo.10250455"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Ilya Kulikov Alexander H. Miller Kyunghyun Cho and Jason Weston. 2018. Importance of a Search Strategy in Neural Dialogue Modelling. https:\/\/api.semanticscholar.org\/CorpusID:53297919","DOI":"10.18653\/v1\/W19-8609"},{"key":"e_1_3_2_1_14_1","unstructured":"Margaret Li Jason Weston and Stephen Roller. 2019. ACUTE-EVAL: Improved Dialogue Evaluation with Optimized Questions and Multi-turn Comparisons. https:\/\/api.semanticscholar.org\/CorpusID:202538657"},{"key":"e_1_3_2_1_15_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain. 74\u201381. https:\/\/aclanthology.org\/W04-1013"},{"key":"e_1_3_2_1_16_1","volume-title":"Xia Song, Georg Buscher, Saurabh Tiwary, Brent Hecht, and Jaime Teevan.","author":"Lin Ying-Chun","year":"2024","unstructured":"Ying-Chun Lin, Jennifer Neville, Jack W. Stokes, Longqi Yang, Tara Safavi, Mengting Wan, Scott Counts, Siddharth Suri, Reid Andersen, Xiaofeng Xu, Deepak Gupta, Sujay Kumar Jauhar, Xia Song, Georg Buscher, Saurabh Tiwary, Brent Hecht, and Jaime Teevan. 2024. Interpretable User Satisfaction Estimation for Conversational Systems with Large Language Models. arxiv:2403.12388."},{"key":"e_1_3_2_1_17_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arxiv:1907.11692."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Clara Meister and Ryan Cotterell. 2021. Language Model Evaluation Beyond Perplexity. arxiv:2106.00085.","DOI":"10.18653\/v1\/2021.acl-long.414"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_22_1","volume-title":"Henley","author":"Parnin Chris","year":"2023","unstructured":"Chris Parnin, Gustavo Soares, Rahul Pandita, Sumit Gulwani, Jessica Rich, and Austin Z. Henley. 2023. Building Your Own Product Copilot: Challenges, Opportunities, and Needs. arxiv:2312.14231."},{"key":"e_1_3_2_1_23_1","volume-title":"Designing Voice User Interfaces: Principles of Conversational Experiences","author":"Pearl Cathy","year":"1955","unstructured":"Cathy Pearl. 2016. Designing Voice User Interfaces: Principles of Conversational Experiences (1st ed.). O\u2019Reilly Media, Inc., Sebastopol, CA. isbn:1491955414","edition":"1"},{"key":"e_1_3_2_1_24_1","volume-title":"Conversational Agents and Natural Language Interaction: Techniques and Effective Practices. Information Science Reference - Imprint of: IGI Publishing","author":"Perez-Marin Diana","unstructured":"Diana Perez-Marin and Ismael Pascual-Nieto. 2011. Conversational Agents and Natural Language Interaction: Techniques and Effective Practices. Information Science Reference - Imprint of: IGI Publishing, Hershey, PA. isbn:1609606175"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_26_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog, 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog, 1, 8 (2019), 9."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli_r_00470"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.47852\/bonviewaia3202939"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3632410.3632456"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2007.70228"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","unstructured":"P. F. Strawson. 2011. Review of Paul Grice Studies in the Way of Words. isbn:9780199587292 https:\/\/doi.org\/10.1093\/acprof:oso\/9780199587292.003.0015 arxiv:https:\/\/academic.oup.com\/book\/0\/chapter\/141927393\/chapter-ag-pdf\/45432827\/book_2039_section_141927393.ag.pdf. 10.1093\/acprof:oso\/9780199587292.003.0015","DOI":"10.1093\/acprof:oso"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1032"},{"key":"e_1_3_2_1_36_1","unstructured":"Yequan Wang Jiawen Deng Aixin Sun and Xuying Meng. 2023. Perplexity from PLM Is Unreliable for Evaluating Text Quality. arxiv:2210.05892."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"}],"event":{"name":"AIware '24: 1st ACM International Conference on AI-Powered Software","location":"Porto de Galinhas Brazil","acronym":"AIware '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 1st ACM International Conference on AI-Powered Software"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664646.3664778","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664646.3664778","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:45Z","timestamp":1750291425000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664646.3664778"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":36,"alternative-id":["10.1145\/3664646.3664778","10.1145\/3664646"],"URL":"https:\/\/doi.org\/10.1145\/3664646.3664778","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}