{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:14:55Z","timestamp":1773155695945,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,26]]},"DOI":"10.1109\/fllm63129.2024.10852500","type":"proceedings-article","created":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T18:35:23Z","timestamp":1738089323000},"page":"462-469","source":"Crossref","is-referenced-by-count":2,"title":["Using LLMs for Evaluating QA Systems: Exploration and Assessment"],"prefix":"10.1109","author":[{"given":"Hadel","family":"Alhawasi","sequence":"first","affiliation":[{"name":"The George Washington University,Department of Computer Science,Washington,DC,USA"}]},{"given":"Abdou","family":"Youssef","sequence":"additional","affiliation":[{"name":"The George Washington University,Department of Computer Science,Washington,DC,USA"}]}],"member":"263","reference":[{"key":"ref1","article-title":"A comprehensive survey of ai-generated content (aigc): A history of generative ai from gan to chatgpt","author":"Cao","year":"2023"},{"key":"ref2","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3528470"},{"key":"ref4","article-title":"Sparks of artificial general intelligence: Early experiments with gpt-4","author":"Bubeck","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2023.0254"},{"key":"ref6","article-title":"Causal reasoning and large language models: Opening a new frontier for causality","author":"K\u0131c\u0131man","year":"2023"},{"key":"ref7","article-title":"Towards understanding how machines can learn causal overhypotheses","author":"Kosoy","year":"2022"},{"key":"ref8","article-title":"Evaluating the logical reasoning ability of chatgpt and gpt-4","author":"Liu","year":"2023"},{"key":"ref9","article-title":"Towards logiglue: A brief survey and a benchmark for analyzing logical reasoning capabilities of language models","author":"Luo","year":"2023"},{"key":"ref10","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text summarization branches out"},{"key":"ref11","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni"},{"key":"ref12","first-page":"85","article-title":"Meteor 1.3: Automatic metric for reliable optimization and evaluation of machine translation systems","volume-title":"Proceedings of the sixth workshop on statistical machine translation","author":"Denkowski"},{"key":"ref13","first-page":"957","article-title":"From word embeddings to document distances","volume-title":"International Conference on machine learning","author":"Kusner"},{"key":"ref14","article-title":"Bertscore: Evaluating text generation with bert","author":"Zhang","year":"2019"},{"key":"ref15","article-title":"Mover-score: Text generation evaluating with contextualized embeddings and earth mover distance","author":"Zhao","year":"2019"},{"key":"ref16","article-title":"Chatgpt as a factual inconsistency evaluator for abstractive text summarization","author":"Luo","year":"2023"},{"key":"ref17","article-title":"Large language models are state-of-the-art evaluators of translation quality","author":"Kocmi","year":"2023"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2023.emnlp-main.153","article-title":"G-eval: Nlg evaluation using gpt-4 with better human alignment","author":"Liu","year":"2023"},{"key":"ref19","article-title":"Bidimensional leaderboards: Generate and evaluate language hand in hand","author":"Kasai","year":"2021"},{"key":"ref20","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref21","article-title":"Gptscore: Evaluate as you desire","author":"Fu","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.131"},{"key":"ref23","article-title":"Freshllms: Refreshing large language models with search engine augmentation","author":"Vu","year":"2023"},{"key":"ref24","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref25","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"ref26","article-title":"Gemini: a family of highly capable multimodal models","author":"Team","year":"2023"}],"event":{"name":"2024 2nd International Conference on Foundation and Large Language Models (FLLM)","location":"Dubai, United Arab Emirates","start":{"date-parts":[[2024,11,26]]},"end":{"date-parts":[[2024,11,29]]}},"container-title":["2024 2nd International Conference on Foundation and Large Language Models (FLLM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10852419\/10852420\/10852500.pdf?arnumber=10852500","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,29]],"date-time":"2025-01-29T18:45:31Z","timestamp":1738176331000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10852500\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,26]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/fllm63129.2024.10852500","relation":{},"subject":[],"published":{"date-parts":[[2024,11,26]]}}}