{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T05:46:17Z","timestamp":1776750377828,"version":"3.51.2"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IIEEE Trans. Software Eng."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1109\/tse.2026.3664287","type":"journal-article","created":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T20:49:41Z","timestamp":1771015781000},"page":"1426-1445","source":"Crossref","is-referenced-by-count":0,"title":["Toward Automated Validation of Language Model Synthesized Test Cases Using Semantic Entropy"],"prefix":"10.1109","volume":"52","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-0897-4800","authenticated-orcid":false,"given":"Hamed","family":"Taherkhani","sequence":"first","affiliation":[{"name":"Lassonde School of Engineering, York University, Toronto, ON, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8829-3773","authenticated-orcid":false,"given":"Jiho","family":"Shin","sequence":"additional","affiliation":[{"name":"Lassonde School of Engineering, York University, Toronto, ON, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7683-2614","authenticated-orcid":false,"given":"Muhammad Ammar","family":"Tahir","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7931-6782","authenticated-orcid":false,"given":"Md Rakib Hossain","family":"Misu","sequence":"additional","affiliation":[{"name":"University of California, Irvine, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8262-0105","authenticated-orcid":false,"given":"Vineet Sunil","family":"Gattani","sequence":"additional","affiliation":[{"name":"Arizona State University, Tempe, AZ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0204-9812","authenticated-orcid":false,"given":"Hadi","family":"Hemmati","sequence":"additional","affiliation":[{"name":"Lassonde School of Engineering, York University, Toronto, ON, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-FoSE59343.2023.00008"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3368208"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0377"},{"key":"ref4","article-title":"Language agent tree search unifies reasoning acting and planning in language models","author":"Zhou","year":"2023"},{"key":"ref5","article-title":"AgentCoder: Multi-agent-based code generation with iterative testing and optimisation","author":"Huang","year":"2023"},{"key":"ref6","article-title":"Epic: Cost-effective search-based prompt engineering of LLMs for code generation","author":"Taherkhani","year":"2024"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.49"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"ref9","article-title":"Language models (mostly) know what they know","author":"Kadavath","year":"2022"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.36227\/techrxiv.171822396.61518693\/v1","article-title":"Detecting LLM hallucinations using Monte Carlo simulations on token probabilities","author":"Ledger","year":"2024"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-86623-4_13"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tse.2024.3519464"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.558"},{"key":"ref14","article-title":"A stitch in time saves nine: Detecting and mitigating hallucinations of LLMs by validating low-confidence generation","author":"Varshney","year":"2023"},{"key":"ref15","article-title":"LLMs know more than they show: On the intrinsic representation of LLM hallucinations","author":"Orgad","year":"2024"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-07421-0"},{"key":"ref17","article-title":"Semantic entropy probes: Robust and cheap hallucination detection in LLMs","author":"Kossen","year":"2024"},{"key":"ref18","article-title":"BigCodeBench: Benchmarking code generation with diverse function calls and complex instructions","author":"Zhuo","year":"2024"},{"key":"ref19","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2009.71"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3227418"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2013.6693084"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2007.37"},{"key":"ref24","article-title":"Unit test case generation with transformers and focal context","author":"Tufano","year":"2020"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2024.107565"},{"key":"ref26","article-title":"Automated test case generation using code models and domain adaptation","author":"Hashtroudi","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ASE56229.2023.00193"},{"key":"ref28","article-title":"Reinforcement learning from automatic feedback for high-quality unit test generation","author":"Steenhoek","year":"2023"},{"key":"ref29","article-title":"ChatUniTest: A ChatGPT-based automated unit test generation tool","author":"Xie","year":"2023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2024.107468"},{"key":"ref31","article-title":"How well does LLM generate security tests?","author":"Zhang","year":"2023"},{"key":"ref32","article-title":"Deep-Bench: Deep learning benchmark dataset for code generation","author":"Daghighfarsoodeh","year":"2025"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3639478.3643119"},{"key":"ref34","article-title":"Can large language models write good property-based tests?","author":"Vikram","year":"2023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00085"},{"key":"ref36","article-title":"No more manual tests? Evaluating and improving ChatGPT for unit test generation","author":"Yuan","year":"2023"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3624032.3624035"},{"key":"ref38","article-title":"Prompting code interpreter to write better unit tests on quixbugs functions","author":"Li","year":"2023"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3382365"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1145\/3643795.3648396","article-title":"Unit test generation using generative ai: A comparative performance analysis of autogeneration tools","volume-title":"Proc. 1st Int. Workshop Large Lang. Models Code","author":"Bhatia","year":"2024"},{"key":"ref41","article-title":"LLM-powered test case generation for detecting tricky bugs","author":"Liu","year":"2024"},{"key":"ref42","article-title":"Large-scale, independent and comprehensive study of the power of LLMs for test case generation","author":"Ou\u00e9draogo","year":"2024"},{"key":"ref43","article-title":"Large language models as test case generators: Performance evaluation and enhancement","author":"Li","year":"2024"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00238"},{"key":"ref45","article-title":"CodeT: Code generation with generated tests","author":"Chen","year":"2022"},{"key":"ref46","article-title":"Detecting hallucinated content in conditional neural sequence generation","author":"Zhou","year":"2020"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.92"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1093\/oed\/1117990385"},{"key":"ref49","article-title":"Code hallucination","author":"Rahman","year":"2024"},{"key":"ref50","article-title":"Exploring and evaluating hallucinations in LLM-powered code generation","author":"Liu","year":"2024"},{"key":"ref51","article-title":"CodeHalu: Code hallucinations in LLMs driven by execution-based verification","author":"Tian","year":"2024"},{"key":"ref52","volume-title":"Compilers Principles, Techniques & Tools.","author":"Alfred","year":"2007"},{"key":"ref53","article-title":"Program synthesis with large language models","author":"Austin","year":"2021"},{"key":"ref54","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2025.findings-naacl.197","article-title":"TestEval: Benchmarking large language models for test case generation","author":"Wang","year":"2025"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3433463"},{"key":"ref56","article-title":"Language agent tree search unifies reasoning acting and planning in language models","author":"Zhou","year":"2024"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-27455-9_10"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME46990.2020.00047"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-021-10024-2"}],"container-title":["IEEE Transactions on Software Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/32\/11488173\/11395655.pdf?arnumber=11395655","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T05:17:38Z","timestamp":1776748658000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11395655\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":59,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tse.2026.3664287","relation":{},"ISSN":["0098-5589","1939-3520","2326-3881"],"issn-type":[{"value":"0098-5589","type":"print"},{"value":"1939-3520","type":"electronic"},{"value":"2326-3881","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4]]}}}