{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T05:07:13Z","timestamp":1737176833002,"version":"3.33.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004318","name":"Microsoft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004318","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004318","name":"Microsoft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004318","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1109\/slt61566.2024.10832298","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:27Z","timestamp":1737052287000},"page":"913-920","source":"Crossref","is-referenced-by-count":0,"title":["Large Language Models as User-Agents For Evaluating Task-Oriented-Dialogue Systems"],"prefix":"10.1109","author":[{"given":"Taaha","family":"Kazi","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana Champaign"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiliang","family":"Lyu","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana Champaign"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sizhe","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana Champaign"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dilek","family":"Hakkani-T\u00fcr","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana Champaign"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gokhan","family":"Tur","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana Champaign"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/E17-1042"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n18-1187"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11431-020-1692-3"},{"article-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref4"},{"issue":"8","key":"ref5","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref6","article-title":"Language models are few-shot learners","volume-title":"Proceedings of NeurIPS","volume":"33","author":"Brown"},{"key":"ref7","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2302.13971"},{"article-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref8"},{"key":"ref9","article-title":"Travelplanner: A benchmark for real-world planning with language agents","author":"Xie","year":"2024","journal-title":"arXiv preprint arXiv:2402.01622"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.90"},{"key":"ref11","article-title":"User simulation with large language models for evaluating task-oriented dialogue","author":"Davidson","year":"2023","journal-title":"arXiv preprint arXiv:2309.13233"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/1622064.1622097"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-007-9059-9"},{"key":"ref14","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019","journal-title":"NAACL"},{"key":"ref15","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019","journal-title":"OpenAI"},{"issue":"1","key":"ref16","doi-asserted-by":"crossref","DOI":"10.1145\/3596510","article-title":"Metaphorical user simulators for evaluating task-oriented dialogue systems","volume":"42","author":"Sun","year":"2023","journal-title":"ACM Trans. Inf. Syst."},{"issue":"1","key":"ref17","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"article-title":"MultiWOZ-a large-scale multi-domain Wizard-of-Oz dataset for task-oriented dialogue modelling","volume-title":"Proceedings of EMNLP","author":"Pawe\u0142","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.sigdial-1.28"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.4324\/9781003022022-6"},{"key":"ref21","article-title":"Teaching models new apis: Domain-agnostic simulators for task oriented dialogue","volume":"abs\/2110.06905","author":"Chen","year":"2021","journal-title":"CoRR"},{"article-title":"Large language models are zero-shot reasoners","volume-title":"Proceedings of NeurIPS","author":"Kojima","key":"ref22"},{"article-title":"Measuring massive multitask language understanding","volume-title":"Proceedings of ICLR","author":"Hendrycks","key":"ref23"},{"key":"ref24","article-title":"Holistic evaluation of language models","author":"Liang","year":"2023","journal-title":"arXiv preprint arXiv:2211.09110"},{"article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Proceedings of CoRL","author":"Ahn","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume-title":"Proceedings of NeurIPS Datasets and Benchmarks Track","author":"Zheng","key":"ref27"},{"key":"ref28","article-title":"Agentbench: Evaluating llms as agents","author":"Liu","year":"2023","journal-title":"arXiv preprint arXiv:2308.03688"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.514"},{"article-title":"Are llms all you need for task-oriented dialogue?","volume-title":"Proceedings of SIGdial","author":"Hude\u010dcek","key":"ref30"},{"article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proceedings of NeurIPS","author":"Wei","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.sigdial-1.25"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.sigdial-1.58"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000343"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.248"},{"article-title":"MultiWOZ 2.1: A consolidated multi-domain dialogue dataset with state corrections and state tracking baselines","volume-title":"Proceedings of LREC","author":"Eric","key":"ref36"}],"event":{"name":"2024 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2024,12,2]]},"location":"Macao","end":{"date-parts":[[2024,12,5]]}},"container-title":["2024 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10830790\/10830793\/10832298.pdf?arnumber=10832298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T08:14:59Z","timestamp":1737101699000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10832298\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt61566.2024.10832298","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]}}}