{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T04:52:52Z","timestamp":1772254372087,"version":"3.50.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031702419","type":"print"},{"value":"9783031702426","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70242-6_37","type":"book-chapter","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T10:03:06Z","timestamp":1726740186000},"page":"392-407","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["CoURAGE: A Framework to\u00a0Evaluate RAG Systems"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0005-6242","authenticated-orcid":false,"given":"Divyanshi","family":"Galla","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaz","family":"Hoda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meiwei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenzhe","family":"Quan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tommy Dong","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joseph","family":"Voyles","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,20]]},"reference":[{"issue":"3","key":"37_CR1","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.tjem.2018.08.001","volume":"18","author":"H Akoglu","year":"2018","unstructured":"Akoglu, H.: User\u2019s guide to correlation coefficients. Turkish J. Emerg. Med. 18(3), 91\u201393 (2018)","journal-title":"Turkish J. Emerg. Med."},{"key":"37_CR2","unstructured":"Ashoori, M.: Decoding the true cost of generative AI for your enterprise (2023). https:\/\/www.linkedin.com\/pulse\/decoding-true-cost-generative-ai-your-enterprise-maryam-ashoori-phd\/"},{"key":"37_CR3","doi-asserted-by":"crossref","unstructured":"Chen, L., Zaharia, M., Zou, J.: How is ChatGPT\u2019s behavior changing over time? arXiv preprint arXiv:2307.09009 (2023)","DOI":"10.1162\/99608f92.5317da47"},{"issue":"1","key":"37_CR4","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1177\/0741088301018001004","volume":"18","author":"NA Chenoweth","year":"2001","unstructured":"Chenoweth, N.A., Hayes, J.R.: Fluency in writing: generating text in L1 and L2. Writ. Commun. 18(1), 80\u201398 (2001)","journal-title":"Writ. Commun."},{"key":"37_CR5","unstructured":"Es, S., James, J., Espinosa-Anke, L., Schockaert, S.: RAGAS: automated evaluation of retrieval augmented generation. arXiv preprint arXiv:2309.15217 (2023)"},{"key":"37_CR6","unstructured":"Florian, L.B., Alexandre, V., Benjamin, N., Yann, C., Alexandre, A.: Exploring precision and recall to assess the quality and diversity of LLMs. arXiv preprint arXiv:2402.10693 (2024)"},{"key":"37_CR7","unstructured":"Gao, Y., et al.: Retrieval-augmented generation for large language models: a survey. arXiv preprint arXiv:2312.10997 (2023)"},{"key":"37_CR8","unstructured":"Gupta, G., Rastegarpanah, B., Iyer, A., Rubin, J., Kenthapadi, K.: Measuring distributional shifts in text: the advantage of language model-based embeddings. arXiv preprint arXiv:2312.02337 (2023)"},{"issue":"12","key":"37_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2023","unstructured":"Ji, Z., et al.: Survey of hallucination in natural language generation. ACM Comput. Surv. 55(12), 1\u201338 (2023)","journal-title":"ACM Comput. Surv."},{"key":"37_CR10","unstructured":"Katranidis, V., Barany, G.: FaaF: facts as a function for the evaluation of RAG systems. arXiv preprint arXiv:2403.03888 (2024)"},{"key":"37_CR11","doi-asserted-by":"crossref","unstructured":"Koike, R., Kaneko, M., Okazaki, N.: OUTFOX: LLM-generated essay detection through in-context learning with adversarially generated examples. arXiv preprint arXiv:2307.11729 (2023)","DOI":"10.1609\/aaai.v38i19.30120"},{"key":"37_CR12","unstructured":"Kuchnik, M., Smith, V., Amvrosiadis, G.: Validating large language models with ReLM. Proc. Mach. Learn. Syst. 5, 457\u2013476 (2023)"},{"key":"37_CR13","doi-asserted-by":"crossref","unstructured":"Kulikov, I., Miller, A.H., Cho, K., Weston, J.: Importance of search and evaluation strategies in neural dialogue modeling. arXiv preprint arXiv:1811.00907 (2018)","DOI":"10.18653\/v1\/W19-8609"},{"key":"37_CR14","unstructured":"Li, Y.: An open source data contamination report for llama series models. arXiv preprint arXiv:2310.17589 (2023)"},{"key":"37_CR15","unstructured":"Liang, P., et\u00a0al.: Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)"},{"key":"37_CR16","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"37_CR17","unstructured":"OpenAI, R.: GPT-4 technical report. arXiv 2303 (2023)"},{"key":"37_CR18","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"37_CR19","doi-asserted-by":"crossref","unstructured":"Risch, J., M\u00f6ller, T., Gutsch, J., Pietsch, M.: Semantic answer similarity for evaluating question answering models. arXiv preprint arXiv:2108.06130 (2021)","DOI":"10.18653\/v1\/2021.mrqa-1.15"},{"key":"37_CR20","doi-asserted-by":"crossref","unstructured":"Saad-Falcon, J., Khattab, O., Potts, C., Zaharia, M.: ARES: an automated evaluation framework for retrieval-augmented generation systems. arXiv preprint arXiv:2311.09476 (2023)","DOI":"10.18653\/v1\/2024.naacl-long.20"},{"key":"37_CR21","unstructured":"Sharma, S., El\u00a0Asri, L., Schulz, H., Zumer, J.: Relevance of unsupervised metrics in task-oriented dialogue for evaluating natural language generation. CoRR abs\/1706.09799 (2017). http:\/\/arxiv.org\/abs\/1706.09799"},{"key":"37_CR22","unstructured":"Shekhar, S., Dubey, T., Mukherjee, K., Saxena, A., Tyagi, A., Kotla, N.: Towards optimizing the costs of LLM usage. arXiv preprint arXiv:2402.01742 (2024)"},{"key":"37_CR23","doi-asserted-by":"crossref","unstructured":"Shuster, K., Poff, S., Chen, M., Kiela, D., Weston, J.: Retrieval augmentation reduces hallucination in conversation. arXiv preprint arXiv:2104.07567 (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.320"},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"Takeshita, S., Ponzetto, S.P., Eckert, K.: ROUGE-K: do your summaries have keywords? arXiv preprint arXiv:2403.05186 (2024)","DOI":"10.18653\/v1\/2024.starsem-1.6"},{"key":"37_CR25","doi-asserted-by":"crossref","unstructured":"Thakur, N., Reimers, N., Daxenberger, J., Gurevych, I.: Augmented SBERT: data augmentation method for improving bi-encoders for pairwise sentence scoring tasks. arXiv preprint arXiv:2010.08240 (2020)","DOI":"10.18653\/v1\/2021.naacl-main.28"},{"key":"37_CR26","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"37_CR27","doi-asserted-by":"crossref","unstructured":"Trischler, A., et al.: NewsQA: a machine comprehension dataset. arXiv preprint arXiv:1611.09830 (2016)","DOI":"10.18653\/v1\/W17-2623"},{"key":"37_CR28","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: evaluating text generation with BERT. arXiv preprint arXiv:1904.09675 (2019)"},{"key":"37_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, W., Peyrard, M., Liu, F., Gao, Y., Meyer, C.M., Eger, S.: MoverScore: text generation evaluating with contextualized embeddings and earth mover distance. arXiv preprint arXiv:1909.02622 (2019)","DOI":"10.18653\/v1\/D19-1053"},{"key":"37_CR30","unstructured":"Zheng, D., Liu, D., Lapata, M., Pan, J.Z.: TrustScore: reference-free evaluation of LLM response trustworthiness. arXiv preprint arXiv:2402.12545 (2024)"},{"key":"37_CR31","unstructured":"Zhou, K., et al.: Don\u2019t make your LLM an evaluation benchmark cheater. arXiv preprint arXiv:2311.01964 (2023)"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70242-6_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,24]],"date-time":"2025-02-24T12:21:31Z","timestamp":1740399691000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70242-6_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031702419","9783031702426"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70242-6_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"20 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLDB","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Applications of Natural Language to Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nldb2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/nldb2024.di.unito.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}