{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T20:56:28Z","timestamp":1770497788782,"version":"3.49.0"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031475450","type":"print"},{"value":"9783031475467","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-47546-7_11","type":"book-chapter","created":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T00:03:15Z","timestamp":1698883395000},"page":"151-171","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Unraveling ChatGPT: A Critical Analysis of\u00a0AI-Generated Goal-Oriented Dialogues and\u00a0Annotations"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7713-7679","authenticated-orcid":false,"given":"Tiziano","family":"Labruna","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3748-1448","authenticated-orcid":false,"given":"Sofia","family":"Brenna","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9998-1942","authenticated-orcid":false,"given":"Andrea","family":"Zaninello","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0740-5778","authenticated-orcid":false,"given":"Bernardo","family":"Magnini","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,2]]},"reference":[{"issue":"4","key":"11_CR1","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1177\/002383099103400404","volume":"34","author":"AH Anderson","year":"1991","unstructured":"Anderson, A.H., et al.: The HCRC map task corpus. Lang. Speech 34(4), 351\u2013366 (1991). https:\/\/doi.org\/10.1177\/002383099103400404","journal-title":"Lang. Speech"},{"key":"11_CR2","unstructured":"Balaraman, V., Sheikhalishahi, S., Magnini, B.: Recent neural methods on dialogue state tracking for task-oriented dialogue systems: a survey. In: Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 239\u2013251. Association for Computational Linguistics, Singapore and Online (2021). https:\/\/aclanthology.org\/2021.sigdial-1.25"},{"key":"11_CR3","doi-asserted-by":"publisher","unstructured":"Brown, T.B., et al.: Language models are few-shot learners (2020). https:\/\/doi.org\/10.48550\/ARXIV.2005.14165, https:\/\/arxiv.org\/abs\/2005.14165","DOI":"10.48550\/ARXIV.2005.14165"},{"key":"11_CR4","doi-asserted-by":"publisher","unstructured":"Budzianowski, P., et al.: MultiWOZ - a large-scale multi-domain Wizard-of-Oz dataset for task-oriented dialogue modelling. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 5016\u20135026. Association for Computational Linguistics, Brussels, Belgium (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1547, https:\/\/aclanthology.org\/D18-1547","DOI":"10.18653\/v1\/D18-1547"},{"key":"11_CR5","doi-asserted-by":"publisher","unstructured":"Cawsey, A.: Explanatory dialogues. Interact. Comput. 1(1), 69\u201392 (1989). https:\/\/doi.org\/10.1016\/0953-5438(89)90008-8, https:\/\/www.sciencedirect.com\/science\/article\/pii\/0953543889900088","DOI":"10.1016\/0953-5438(89)90008-8"},{"key":"11_CR6","unstructured":"Christiano, P.F., Leike, J., Brown, T., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"10","key":"11_CR7","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1002\/pfi.21727","volume":"56","author":"SY Chyung","year":"2017","unstructured":"Chyung, S.Y., Roberts, K., Swanson, I., Hankinson, A.: Evidence-based survey design: the use of a midpoint on the Likert scale. Perform. Improv. 56(10), 15\u201323 (2017)","journal-title":"Perform. Improv."},{"key":"11_CR8","unstructured":"Cohen, A.D., et al.: LaMDA: language models for dialog applications. arXiv (2022)"},{"key":"11_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Dey, S., Kummara, R., Desarkar, M.S.: Towards fair evaluation of dialogue state tracking by flexible incorporation of turn-level performances. arXiv preprint arXiv:2204.03375 (2022)","DOI":"10.18653\/v1\/2022.acl-short.35"},{"key":"11_CR11","doi-asserted-by":"publisher","unstructured":"Ding, B., Qin, C., Liu, L., Bing, L., Joty, S., Li, B.: Is GPT-3 a good data annotator? (2022). https:\/\/doi.org\/10.48550\/ARXIV.2212.10450, https:\/\/arxiv.org\/abs\/2212.10450","DOI":"10.48550\/ARXIV.2212.10450"},{"key":"11_CR12","unstructured":"Gamer, M., Lemon, J., Fellows, I., Singh, S.: Various coefficients of interrater reliability and agreement (2019). https:\/\/CRAN.R-project.org\/package=irr. R package version 0.84.1"},{"issue":"1","key":"11_CR13","first-page":"66","volume":"2","author":"R Garland","year":"1991","unstructured":"Garland, R.: The mid-point on a rating scale: is it desirable. Mark. Bull. 2(1), 66\u201370 (1991)","journal-title":"Mark. Bull."},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Gilardi, F., Alizadeh, M., Kubli, M.: ChatGPT outperforms crowd-workers for text-annotation tasks. arXiv preprint arXiv:2303.15056 (2023)","DOI":"10.1073\/pnas.2305016120"},{"key":"11_CR15","doi-asserted-by":"publisher","unstructured":"Gilardi, F., Alizadeh, M., Kubli, M.: ChatGPT outperforms crowd workers for text-annotation tasks. Proc. Natl. Acad. Sci. 120(30), e2305016120 (2023). https:\/\/doi.org\/10.1073\/pnas.2305016120, https:\/\/www.pnas.org\/doi\/abs\/10.1073\/pnas.2305016120","DOI":"10.1073\/pnas.2305016120"},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Guo, J., Shuang, K., Zhang, K., Liu, Y., Li, J., Wang, Z.: Learning to imagine: distillation-based interactive context exploitation for dialogue state tracking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 12845\u201312853 (2023)","DOI":"10.1609\/aaai.v37i11.26510"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Howard, J., Ruder, S.: Universal language model fine-tuning for text classification. arXiv preprint arXiv:1801.06146 (2018)","DOI":"10.18653\/v1\/P18-1031"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Huang, F., Kwak, H., An, J.: Is ChatGPT better than human annotators? Potential and limitations of ChatGPT in explaining implicit hate speech. ArXiv abs\/2302.07736 (2023)","DOI":"10.1145\/3543873.3587368"},{"key":"11_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2022","unstructured":"Ji, Z., et al.: Survey of hallucination in natural language generation. ACM Comput. Surv. 55, 1\u201338 (2022)","journal-title":"ACM Comput. Surv."},{"key":"11_CR20","doi-asserted-by":"publisher","unstructured":"Kelley, J.F.: An empirical methodology for writing user-friendly natural language computer applications. In: Proceedings of the SIGCHI Conference on Human Factors in Computing Systems, CHI 1983, pp. 193\u2013196. Association for Computing Machinery, New York (1983). https:\/\/doi.org\/10.48550\/ARXIV.2005.14165, https:\/\/arxiv.org\/abs\/2005.14165","DOI":"10.48550\/ARXIV.2005.14165"},{"key":"11_CR21","unstructured":"Krippendorff, K.: Computing Krippendorff\u2019s alpha-reliability (2011)"},{"issue":"2","key":"11_CR22","doi-asserted-by":"publisher","first-page":"249","DOI":"10.2501\/S147078530920120X","volume":"52","author":"P Lietz","year":"2010","unstructured":"Lietz, P.: Research into questionnaire design: a summary of the literature. Int. J. Mark. Res. 52(2), 249\u2013272 (2010)","journal-title":"Int. J. Mark. Res."},{"key":"11_CR23","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. CoRR abs\/2107.13586 (2021). https:\/\/arxiv.org\/abs\/2107.13586"},{"key":"11_CR24","first-page":"1","volume":"55","author":"P Liu","year":"2021","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. ACM Comput. Surv. (CSUR) 55, 1\u201335 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"11_CR25","doi-asserted-by":"publisher","unstructured":"Louvan, S., Magnini, B.: Recent neural methods on slot filling and intent classification for task-oriented dialogue systems: a survey. In: Proceedings of the 28th International Conference on Computational Linguistics, pp. 480\u2013496. International Committee on Computational Linguistics, Barcelona (Online) (2020). https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.42, https:\/\/aclanthology.org\/2020.coling-main.42","DOI":"10.18653\/v1\/2020.coling-main.42"},{"issue":"3","key":"11_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-031-02176-3","volume":"13","author":"M McTear","year":"2020","unstructured":"McTear, M.: Conversational AI: dialogue systems, conversational agents, and chatbots. Synthesis Lect. Hum. Lang. Technol. 13(3), 1\u2013251 (2020)","journal-title":"Synthesis Lect. Hum. Lang. Technol."},{"key":"11_CR27","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/5247.001.0001","volume-title":"Participating in Explanatory Dialogues: Interpreting and Responding to Questions in Context","author":"JD Moore","year":"1994","unstructured":"Moore, J.D.: Participating in Explanatory Dialogues: Interpreting and Responding to Questions in Context. MIT Press, Cambridge (1994)"},{"key":"11_CR28","unstructured":"OpenAI: Introducing ChatGPT. OpenAI Blog (2022). https:\/\/openai.com\/blog\/chatgpt"},{"key":"11_CR29","doi-asserted-by":"publisher","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback (2022). https:\/\/doi.org\/10.48550\/ARXIV.2203.02155, https:\/\/arxiv.org\/abs\/2203.02155","DOI":"10.48550\/ARXIV.2203.02155"},{"key":"11_CR30","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155 (2022)"},{"key":"11_CR31","unstructured":"Pangakis, N., Wolken, S., Fasching, N.: Automated annotation with generative AI requires validation. arXiv preprint arXiv:2306.00176 (2023)"},{"key":"11_CR32","unstructured":"R Core Team: R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria (2021). https:\/\/www.R-project.org\/"},{"issue":"8","key":"11_CR33","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"11_CR34","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020). http:\/\/jmlr.org\/papers\/v21\/20-074.html"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Reiss, M.V.: Testing the reliability of ChatGPT for text annotation and classification: a cautionary remark. arXiv preprint arXiv:2304.11085 (2023)","DOI":"10.31219\/osf.io\/rvy5p"},{"issue":"3","key":"11_CR36","doi-asserted-by":"publisher","first-page":"717","DOI":"10.1109\/TCDS.2020.3044366","volume":"13","author":"KJ Rohlfing","year":"2021","unstructured":"Rohlfing, K.J., et al.: Explanation as a social practice: toward a conceptual framework for the social design of ai systems. IEEE Trans. Cogn. Dev. Syst. 13(3), 717\u2013728 (2021). https:\/\/doi.org\/10.1109\/TCDS.2020.3044366","journal-title":"IEEE Trans. Cogn. Dev. Syst."},{"key":"11_CR37","unstructured":"Sucameli, I., De Quattro, M., Eshghi, A., Suglia, A., Simi, M.: Dialogue act and slot recognition in Italian complex dialogues. In: Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference, pp. 51\u201360 (2022)"},{"key":"11_CR38","doi-asserted-by":"publisher","first-page":"67","DOI":"10.4000\/ijcol.842","volume":"7","author":"I Sucameli","year":"2021","unstructured":"Sucameli, I., Lenci, A., Magnini, B., Speranza, M., Simi, M.: Toward data-driven collaborative dialogue systems: the JILDA dataset. Ital. J. Comput. Linguist. 7, 67\u201390 (2021)","journal-title":"Ital. J. Comput. Linguist."},{"key":"11_CR39","unstructured":"T\u00f6rnberg, P.: ChatGPT-4 outperforms experts and crowd workers in annotating political twitter messages with zero-shot learning. arXiv preprint arXiv:2304.06588 (2023)"},{"key":"11_CR40","unstructured":"Touvron, H., et al.: Llama 2: open foundation and fine-tuned chat models (2023)"},{"key":"11_CR41","unstructured":"Wachsmuth, H., Alshomary, M.: \u201cMama always had a way of explaining things so I could understand\u201d: a dialogue corpus for learning to construct explanations. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 344\u2013354. International Committee on Computational Linguistics, Gyeongju (2022). https:\/\/aclanthology.org\/2022.coling-1.27"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Xie, H., et al.: Correctable-DST: mitigating historical context mismatch between training and inference for improved dialogue state tracking. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 876\u2013889 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.56"},{"key":"11_CR43","doi-asserted-by":"crossref","unstructured":"Ye, F., Feng, Y., Yilmaz, E.: ASSIST: towards label noise-robust dialogue state tracking. arXiv preprint arXiv:2202.13024 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.214"},{"key":"11_CR44","doi-asserted-by":"crossref","unstructured":"Ye, F., Manotumruksa, J., Yilmaz, E.: MultiWOZ 2.4: a multi-domain task-oriented dialogue dataset with essential annotation corrections to improve state tracking evaluation. arXiv preprint arXiv:2104.00773 (2021)","DOI":"10.18653\/v1\/2022.sigdial-1.34"},{"key":"11_CR45","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: evaluating text generation with BERT. arXiv preprint arXiv:1904.09675 (2019)"},{"key":"11_CR46","unstructured":"Zhu, Y., Zhang, P., Haq, E.U., Hui, P., Tyson, G.: Can ChatGPT reproduce human-generated labels? A study of social computing tasks. arXiv preprint arXiv:2304.10145 (2023)"}],"container-title":["Lecture Notes in Computer Science","AIxIA 2023 \u2013 Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-47546-7_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T03:25:35Z","timestamp":1703474735000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-47546-7_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031475450","9783031475467"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-47546-7_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"2 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIxIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of the Italian Association for Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aiia2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.aixia2023.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair.org","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"62% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20 external reviewers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}