{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T02:20:00Z","timestamp":1773195600423,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T00:00:00Z","timestamp":1762905600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T00:00:00Z","timestamp":1762905600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,12]]},"DOI":"10.1109\/cogmi67134.2025.00012","type":"proceedings-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T19:55:35Z","timestamp":1773086135000},"page":"12-23","source":"Crossref","is-referenced-by-count":0,"title":["An Iterative Multi-Agent Analysis for Automated Evaluation in NLG Tasks"],"prefix":"10.1109","author":[{"given":"Hadel","family":"Alhawasi","sequence":"first","affiliation":[{"name":"The George Washington University,Department of Computer Science,Washington, DC,USA"}]},{"given":"Ruocheng","family":"Shan","sequence":"additional","affiliation":[{"name":"The George Washington University,Department of Computer Science,Washington, DC,USA"}]},{"given":"Abdou","family":"Youssef","sequence":"additional","affiliation":[{"name":"The George Washington University,Department of Computer Science,Washington, DC,USA"}]}],"member":"263","reference":[{"key":"ref1","first-page":"313","article-title":"Comparing automatic and human evaluation of nlg systems","volume-title":"11th Conference of the european chapter of the association for computational linguistics","author":"Belz"},{"key":"ref2","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text summarization branches out"},{"key":"ref3","article-title":"Bertscore: Evaluating text generation with bert","author":"Zhang","year":"2019"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.365"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2023.emnlp-main.153","article-title":"G-eval: Nlg evaluation using gpt-4 with better human alignment","author":"Liu","year":"2023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME46990.2020.00067"},{"key":"ref7","article-title":"Merge, ensemble, and cooperate! a survey on collaborative strategies in the era of large language models","author":"Lu","year":"2024"},{"key":"ref8","article-title":"Agentbench: Evaluating llms as agents","author":"Liu","year":"2023"},{"key":"ref9","article-title":"Improving factuality and reasoning in language models through multiagent debate","volume-title":"Forty-first International Conference on Machine Learning","author":"Du"},{"key":"ref10","article-title":"Does writing with language models reduce content diversity?","volume-title":"The Twelfth International Conference on Learning Representations","author":"Padmakumar"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.870"},{"key":"ref12","article-title":"Large language models are state-of-the-art evaluators of translation quality","author":"Kocmi","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-66997-2_1"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/FLLM63129.2024.10852500"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1754"},{"key":"ref16","article-title":"Chateval: Towards better llm-based evaluators through multi-agent debate","author":"Chan","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00373"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3390\/app11146421"},{"key":"ref20","article-title":"Chatlaw: A multi-agent collaborative legal assistant with knowledge graph enhanced mixture-of-experts large language model","author":"Cui","year":"2023"},{"key":"ref21","article-title":"Hello gpt-4","year":"2024"},{"key":"ref22","article-title":"Claude 3.5 sonnet","year":"2024","journal-title":"Anthropic Blog"},{"key":"ref23","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"ref24","article-title":"Gemini: a family of highly capable multimodal models","author":"Anil","year":"2023"},{"key":"ref25","article-title":"Gemma 2: Improving open language models at a practical size","author":"Riviere","year":"2024"},{"key":"ref26","article-title":"Large language models cannot self-correct reasoning yet","author":"Huang","year":"2023"}],"event":{"name":"2025 IEEE 7th International Conference on Cognitive Machine Intelligence (CogMI)","location":"Pittsburgh, PA, USA","start":{"date-parts":[[2025,11,12]]},"end":{"date-parts":[[2025,11,14]]}},"container-title":["2025 IEEE 7th International Conference on Cognitive Machine Intelligence (CogMI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11417011\/11416503\/11417045.pdf?arnumber=11417045","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T05:33:41Z","timestamp":1773120821000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11417045\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,12]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/cogmi67134.2025.00012","relation":{},"subject":[],"published":{"date-parts":[[2025,11,12]]}}}