{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T12:04:03Z","timestamp":1781006643319,"version":"3.54.1"},"reference-count":58,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010418","name":"IITP","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"NRF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010418","name":"IITP","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.knosys.2026.116152","type":"journal-article","created":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T23:30:02Z","timestamp":1778283002000},"page":"116152","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["SERA: Self-referential assessment framework for bidirectional generative commonsense reasoning"],"prefix":"10.1016","volume":"345","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4761-9818","authenticated-orcid":false,"given":"Jaehyung","family":"Seo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0841-4262","authenticated-orcid":false,"given":"Hyeonseok","family":"Moon","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3971-082X","authenticated-orcid":false,"given":"Yoonna","family":"Jang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9269-1157","authenticated-orcid":false,"given":"Heuiseok","family":"Lim","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.knosys.2026.116152_b1","doi-asserted-by":"crossref","first-page":"868","DOI":"10.1007\/s10439-023-03172-7","article-title":"Role of chat gpt in public health","volume":"51","author":"Biswas","year":"2023","journal-title":"Ann. Biomed. Eng."},{"issue":"3","key":"10.1016\/j.knosys.2026.116152_b2","article-title":"Study and analysis of chat GPT and its impact on different fields of study","volume":"8","author":"Kalla","year":"2023","journal-title":"Int. J. Innov. Sci. Res. Technol."},{"issue":"6","key":"10.1016\/j.knosys.2026.116152_b3","doi-asserted-by":"crossref","first-page":"3197","DOI":"10.1007\/s11845-023-03377-8","article-title":"GPT-4: a new era of artificial intelligence in medicine","volume":"192","author":"Waisberg","year":"2023","journal-title":"Ir. J. Med. Sci. (1971-)"},{"key":"10.1016\/j.knosys.2026.116152_b4","series-title":"Capabilities of gpt-4 on medical challenge problems","author":"Nori","year":"2023"},{"issue":"2270","key":"10.1016\/j.knosys.2026.116152_b5","doi-asserted-by":"crossref","DOI":"10.1098\/rsta.2023.0254","article-title":"Gpt-4 passes the bar exam","volume":"382","author":"Katz","year":"2024","journal-title":"Phil. Trans. R. Soc. A"},{"key":"10.1016\/j.knosys.2026.116152_b6","series-title":"GPT-4 technical report","author":"OpenAI","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b7","series-title":"Will GPT-4 run doom?","author":"de Wynter","year":"2024"},{"key":"10.1016\/j.knosys.2026.116152_b8","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation","first-page":"3098","article-title":"ChatGPT is a knowledgeable but inexperienced solver: An investigation of commonsense problem in large language models","author":"Bian","year":"2024"},{"key":"10.1016\/j.knosys.2026.116152_b9","series-title":"Evaluating the logical reasoning ability of chatgpt and gpt-4","author":"Liu","year":"2023"},{"issue":"2","key":"10.1016\/j.knosys.2026.116152_b10","doi-asserted-by":"crossref","first-page":"234","DOI":"10.2307\/2344179","article-title":"The planning of observational studies of human populations","volume":"128","author":"Cochran","year":"1965","journal-title":"J. R. Stat. Soc. Ser. A (General)"},{"key":"10.1016\/j.knosys.2026.116152_b11","series-title":"Overt Bias in Observational Studies","author":"Rosenbaum","year":"2002"},{"key":"10.1016\/j.knosys.2026.116152_b12","series-title":"2003 AAAI Spring Symposium","first-page":"28","article-title":"TimeML: Robust specification of event and temporal expressions in text","volume":"vol. 3","author":"Pustejovsky","year":"2003"},{"key":"10.1016\/j.knosys.2026.116152_b13","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing","first-page":"4569","article-title":"GLUCOSE: GeneraLized and COntextualized story explanations","author":"Mostafazadeh","year":"2020"},{"key":"10.1016\/j.knosys.2026.116152_b14","series-title":"An overview of temporal commonsense reasoning and acquisition","author":"Wenzel","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b15","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","first-page":"5185","article-title":"Climbing towards NLU: On meaning, form, and understanding in the age of data","author":"Bender","year":"2020"},{"key":"10.1016\/j.knosys.2026.116152_b16","series-title":"2011 AAAI Spring Symposium Series","article-title":"Choice of plausible alternatives: An evaluation of commonsense causal reasoning","author":"Roemmele","year":"2011"},{"key":"10.1016\/j.knosys.2026.116152_b17","series-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","first-page":"93","article-title":"SWAG: A large-scale adversarial dataset for grounded commonsense inference","author":"Zellers","year":"2018"},{"key":"10.1016\/j.knosys.2026.116152_b18","series-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","first-page":"4791","article-title":"HellaSwag: Can a machine really finish your sentence?","author":"Zellers","year":"2019"},{"key":"10.1016\/j.knosys.2026.116152_b19","series-title":"Proceedings of NAACL-HLT","first-page":"4149","article-title":"COMMONSENSEQA: A question answering challenge targeting commonsense knowledge","author":"Talmor","year":"2019"},{"key":"10.1016\/j.knosys.2026.116152_b20","first-page":"7432","article-title":"Piqa: Reasoning about physical commonsense in natural language","volume":"vol. 34","author":"Bisk","year":"2020"},{"key":"10.1016\/j.knosys.2026.116152_b21","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2020","first-page":"1823","article-title":"CommonGen: A constrained text generation challenge for generative commonsense reasoning","author":"Lin","year":"2020"},{"issue":"9","key":"10.1016\/j.knosys.2026.116152_b22","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1145\/3474381","article-title":"Winogrande: An adversarial winograd schema challenge at scale","volume":"64","author":"Sakaguchi","year":"2021","journal-title":"Commun. ACM"},{"key":"10.1016\/j.knosys.2026.116152_b23","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"9785","article-title":"CRoW: Benchmarking commonsense reasoning in real-world tasks","author":"Ismayilzada","year":"2023"},{"issue":"4","key":"10.1016\/j.knosys.2026.116152_b24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3615355","article-title":"Benchmarks for automated commonsense reasoning: A survey","volume":"56","author":"Davis","year":"2023","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.knosys.2026.116152_b25","article-title":"Judging LLM-as-a-judge with MT-Bench and chatbot arena","volume":"36","author":"Zheng","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116152_b26","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"1443","article-title":"Spurious correlations in reference-free evaluation of text generation","author":"Durmus","year":"2022"},{"key":"10.1016\/j.knosys.2026.116152_b27","series-title":"Length-controlled AlpacaEval: A simple way to debias automatic evaluators","author":"Dubois","year":"2024"},{"key":"10.1016\/j.knosys.2026.116152_b28","doi-asserted-by":"crossref","unstructured":"S. Kim, J. Suk, S. Longpre, B.Y. Lin, J. Shin, S. Welleck, G. Neubig, M. Lee, K. Lee, M. Seo, Prometheus 2: An Open Source Language Model Specialized in Evaluating Other Language Models, in: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, 2024, pp. 4334\u20134353.","DOI":"10.18653\/v1\/2024.emnlp-main.248"},{"key":"10.1016\/j.knosys.2026.116152_b29","unstructured":"X. Wang, J. Wei, D. Schuurmans, Q.V. Le, E.H. Chi, S. Narang, A. Chowdhery, D. Zhou, Self-Consistency Improves Chain of Thought Reasoning in Language Models, in: The Eleventh International Conference on Learning Representations."},{"key":"10.1016\/j.knosys.2026.116152_b30","doi-asserted-by":"crossref","unstructured":"W. Chen, W. Wang, Z. Chu, K. Ren, Z. Zheng, Z. Lu, Self-Para-Consistency: Improving Reasoning Tasks at Low Cost for Large Language Models, in: Findings of the Association for Computational Linguistics ACL 2024, 2024, pp. 14162\u201314167.","DOI":"10.18653\/v1\/2024.findings-acl.842"},{"key":"10.1016\/j.knosys.2026.116152_b31","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116152_b32","series-title":"General purpose verification for chain of thought prompting","author":"Vacareanu","year":"2024"},{"issue":"3","key":"10.1016\/j.knosys.2026.116152_b33","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1037\/0033-295X.101.3.371","article-title":"Constructing inferences during narrative text comprehension","volume":"101","author":"Graesser","year":"1994","journal-title":"Psychol Rev"},{"issue":"5","key":"10.1016\/j.knosys.2026.116152_b34","doi-asserted-by":"crossref","first-page":"292","DOI":"10.1111\/j.1467-9280.1995.tb00513.x","article-title":"The construction of situation models in narrative comprehension: An event-indexing model","volume":"6","author":"Zwaan","year":"1995","journal-title":"Psychol. Sci."},{"issue":"1\u20132","key":"10.1016\/j.knosys.2026.116152_b35","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1016\/0004-3702(93)90015-4","article-title":"Interpretation as abduction","volume":"63","author":"Hobbs","year":"1993","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.knosys.2026.116152_b36","series-title":"International Conference on Learning Representations","article-title":"Abductive commonsense reasoning","author":"Bhagavatula","year":"2019"},{"key":"10.1016\/j.knosys.2026.116152_b37","doi-asserted-by":"crossref","unstructured":"N. Mostafazadeh, N. Chambers, X. He, D. Parikh, D. Batra, L. Vanderwende, P. Kohli, J. Allen, A corpus and cloze evaluation for deeper understanding of commonsense stories, in: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2016, pp. 839\u2013849.","DOI":"10.18653\/v1\/N16-1098"},{"key":"10.1016\/j.knosys.2026.116152_b38","series-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"5253","article-title":"COLA: Contextualized commonsense causal reasoning from the causal inference perspective","author":"Wang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b39","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts","first-page":"27","article-title":"Commonsense reasoning for natural language processing","author":"Sap","year":"2020"},{"key":"10.1016\/j.knosys.2026.116152_b40","first-page":"139","article-title":"The curious case of commonsense intelligence","volume":"151","author":"Choi","year":"2022","journal-title":"J. Am. Acad. Arts Sci."},{"key":"10.1016\/j.knosys.2026.116152_b41","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","first-page":"2391","article-title":"Cosmos QA: Machine reading comprehension with contextual commonsense reasoning","author":"Huang","year":"2019"},{"key":"10.1016\/j.knosys.2026.116152_b42","series-title":"Thirteenth International Conference on the Principles of Knowledge Representation and Reasoning","article-title":"The winograd schema challenge","author":"Levesque","year":"2012"},{"key":"10.1016\/j.knosys.2026.116152_b43","series-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"Papineni","year":"2002"},{"key":"10.1016\/j.knosys.2026.116152_b44","series-title":"Text Summarization Branches Out","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.knosys.2026.116152_b45","series-title":"Proceedings of the Third Conference on Machine Translation: Research Papers","first-page":"186","article-title":"A call for clarity in reporting BLEU scores","author":"Post","year":"2018"},{"key":"10.1016\/j.knosys.2026.116152_b46","series-title":"International Conference on Learning Representations","article-title":"BERTScore: Evaluating text generation with BERT","author":"Zhang","year":"2019"},{"key":"10.1016\/j.knosys.2026.116152_b47","doi-asserted-by":"crossref","unstructured":"T. Tang, H. Lu, Y. Jiang, H. Huang, D. Zhang, W.X. Zhao, T. Kocmi, F. Wei, Not all metrics are guilty: Improving NLG evaluation by diversifying references, in: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), 2024, pp. 6596\u20136610.","DOI":"10.18653\/v1\/2024.naacl-long.367"},{"key":"10.1016\/j.knosys.2026.116152_b48","doi-asserted-by":"crossref","unstructured":"S. Sheng, Y. Xu, L. Fu, J. Ding, L. Zhou, X. Wang, C. Zhou, Is Reference Necessary in the Evaluation of NLG Systems? When and Where?, in: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), 2024, pp. 8580\u20138596.","DOI":"10.18653\/v1\/2024.naacl-long.474"},{"key":"10.1016\/j.knosys.2026.116152_b49","doi-asserted-by":"crossref","unstructured":"B. Thompson, M. Post, Automatic Machine Translation Evaluation in Many Languages via Zero-Shot Paraphrasing, in: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP, 2020, pp. 90\u2013121.","DOI":"10.18653\/v1\/2020.emnlp-main.8"},{"key":"10.1016\/j.knosys.2026.116152_b50","first-page":"27263","article-title":"Bartscore: Evaluating generated text as text generation","volume":"34","author":"Yuan","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116152_b51","doi-asserted-by":"crossref","unstructured":"D. Deutsch, R. Dror, D. Roth, On the Limitations of Reference-Free Evaluations of Generated Text, in: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, 2022, pp. 10960\u201310977.","DOI":"10.18653\/v1\/2022.emnlp-main.753"},{"key":"10.1016\/j.knosys.2026.116152_b52","doi-asserted-by":"crossref","unstructured":"M. Levy, A. Jacoby, Y. Goldberg, Same Task, More Tokens: the Impact of Input Length on the Reasoning Performance of Large Language Models, in: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2024, pp. 15339\u201315353.","DOI":"10.18653\/v1\/2024.acl-long.818"},{"key":"10.1016\/j.knosys.2026.116152_b53","series-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b54","series-title":"Llama 3 model card","author":"AI@Meta","year":"2024"},{"key":"10.1016\/j.knosys.2026.116152_b55","series-title":"Mistral 7B","author":"Jiang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b56","series-title":"Gemma: Open models based on gemini research and technology","author":"Gemma Team","year":"2024"},{"key":"10.1016\/j.knosys.2026.116152_b57","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"2511","article-title":"G-eval: NLG evaluation using gpt-4 with better human alignment","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.116152_b58","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008786?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008786?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T11:33:25Z","timestamp":1781004805000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126008786"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":58,"alternative-id":["S0950705126008786"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116152","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SERA: Self-referential assessment framework for bidirectional generative commonsense reasoning","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116152","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116152"}}