{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T22:36:56Z","timestamp":1762900616201,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657712","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"1952-1962","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Rethinking the Evaluation of Dialogue Systems: Effects of User Feedback on Crowdworkers and LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5301-4244","authenticated-orcid":false,"given":"Clemencia","family":"Siro","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9447-4172","authenticated-orcid":false,"given":"Mohammad","family":"Aliannejadi","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amstedam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-0202","authenticated-orcid":false,"given":"Maarten","family":"de Rijke","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331265"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1480506.1480508"},{"key":"e_1_3_2_1_3_1","unstructured":"Amazon Mechanical Turk. 2023. https:\/\/www.mturk.com."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406522.3446023"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401032"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--45442--5_26"},{"key":"e_1_3_2_1_7_1","volume-title":"Multi-domain Conversation Quality Evaluation via User Satisfaction Estimation. CoRR","author":"Bodigutla Praveen Kumar","year":"2019","unstructured":"Praveen Kumar Bodigutla, Lazaros Polymenakos, and Spyros Matsoukas. 2019. Multi-domain Conversation Quality Evaluation via User Satisfaction Estimation. CoRR , Vol. abs\/1911.08567 (2019). showeprint[arXiv]1911.08567 http:\/\/arxiv.org\/abs\/1911.08567"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.FINDINGS-EMNLP.347"},{"key":"e_1_3_2_1_9_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Brown Tom B.","year":"2020","unstructured":"Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2307.03109"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3564284"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358047"},{"key":"e_1_3_2_1_13_1","unstructured":"Cyril W. Cleverdon Jack Mills and Michael Keen. 1966. Factors determining the performance of indexing systems. In ASLIB Cranfield research project."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09866-x"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.ACL-LONG.626"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159654"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3078714.3078715"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2303.15056"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591884"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371857"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3291035"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357988"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2303.16854"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300637"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080840"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-012--9205-0"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914754"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556220"},{"key":"e_1_3_2_1_29_1","volume-title":"Towards Deep Conversational Recommendations. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018","author":"Li Raymond","year":"2018","unstructured":"Raymond Li, Samira Ebrahimi Kahou, Hannes Schulz, Vincent Michalski, Laurent Charlin, and Chris Pal. 2018. Towards Deep Conversational Recommendations. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3--8, 2018, Montr\u00e9 al, Canada, Samy Bengio, Hanna M. Wallach, Hugo Larochelle, Kristen Grauman, Nicol\u00f2 Cesa-Bianchi, and Roman Garnett (Eds.). 9748--9758. https:\/\/proceedings.neurips.cc\/paper\/2018\/hash\/800de15c79c8d840f4e78d3af937d4d4-Abstract.html"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186065"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767721"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3002172"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--76941--7_17"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v4i1.13287"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.ACL-MAIN.64"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210052"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401112"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376318"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531798"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3624989"},{"key":"e_1_3_2_1_41_1","volume-title":"Context Does Matter: Implications for Crowdsourced Evaluation Labels in Task-Oriented Dialogue Systems. arXiv preprint arXiv:2404.09980","author":"Siro Clemencia","year":"2024","unstructured":"Clemencia Siro, Mohammad Aliannejadi, and Maarten de Rijke. 2024. Context Does Matter: Implications for Crowdsourced Evaluation Labels in Task-Oriented Dialogue Systems. arXiv preprint arXiv:2404.09980 (2024)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463241"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.IPM.2019.02.001"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2021.FINDINGS-EMNLP.354"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403202"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11253"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000066"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609579"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Washington DC USA","acronym":"SIGIR 2024"},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657712","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657712","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:40:37Z","timestamp":1755841237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657712"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":48,"alternative-id":["10.1145\/3626772.3657712","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657712","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}