{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T22:16:24Z","timestamp":1769724984670,"version":"3.49.0"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1109\/icst62969.2025.10989025","type":"proceedings-article","created":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T17:05:21Z","timestamp":1747760721000},"page":"429-440","source":"Crossref","is-referenced-by-count":1,"title":["LLMs in the Heart of Differential Testing: A Case Study on a Medical Rule Engine"],"prefix":"10.1109","author":[{"given":"Erblin","family":"Isaku","sequence":"first","affiliation":[{"name":"Simula Research Laboratory and University of Oslo,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christoph","family":"Laaber","sequence":"additional","affiliation":[{"name":"Simula Research Laboratory,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hassan","family":"Sartaj","sequence":"additional","affiliation":[{"name":"Simula Research Laboratory,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaukat","family":"Ali","sequence":"additional","affiliation":[{"name":"Simula Research Laboratory,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Schwitalla","sequence":"additional","affiliation":[{"name":"Cancer Registry of Norway,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan F.","family":"Nyg\u00e5rd","sequence":"additional","affiliation":[{"name":"Cancer Registry of Norway and UiT The Arctic University of Norway,Oslo,Norway"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/saner56733.2023.00033"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3293455"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1985793.1985795"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tse.2014.2372785"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1013699998"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549162"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3143561"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.34"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s43856-023-00370-1"},{"key":"ref10","article-title":"Effective test generation using pre-trained large language models and mutation testing","volume":"abs\/2308.16557","author":"Dakhel","year":"2023","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598067"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1964.10490181"},{"key":"ref13","article-title":"Mistral 7b","volume":"abs\/2310.06825","author":"J.","year":"2023","journal-title":"CoRR"},{"key":"ref14","article-title":"Mixtral of experts","volume":"abs\/2401.04088","author":"J.","year":"2024","journal-title":"CoRR"},{"key":"ref15","article-title":"A study of generative large language model for medical research and healthcare","volume":"6","author":"P.","year":"2023","journal-title":"npj Digitital Medicine"},{"key":"ref16","article-title":"Llama 2: Open foundation and fine-tuned chat models","volume":"abs\/2307.09288","author":"T.","year":"2023","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ase56229.2023.00109"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3395363.3397374"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3624032.3624035"},{"key":"ref20","article-title":"Random testing","author":"Hamlet","year":"2021","journal-title":"Essentials of Software Testing"},{"key":"ref21","article-title":"Robust confidence intervals for effect sizes: A comparative study of cohen\u2019s d and cliff\u2019s delta under non-normality and heterogeneous variances","volume-title":"Annual Meeting of the American Educational Research Association","author":"Hess"},{"key":"ref22","article-title":"A survey on hallucination in large language models: Principles, taxonomy, challenges, and open questions","volume":"abs\/2311.05232","author":"Huang","year":"2023","journal-title":"CoRR"},{"key":"ref23","article-title":"METAL: metamorphic testing framework for analyzing large-language model qualities","volume":"abs\/2312.06056","author":"Hyun","year":"2023","journal-title":"CoRR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME58846.2023.00065"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/tse.2010.62"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1952.10483441"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3613882"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/icse-companion58688.2023.00102"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3545945.3569770"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3613897"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/tse.2017.2774829"},{"issue":"1","key":"ref32","first-page":"100","article-title":"Differential testing for software","volume":"10","author":"McKeeman","year":"1998","journal-title":"Digital Technical Journal"},{"key":"ref33","volume-title":"GPT 3.5","year":"2024"},{"key":"ref34","article-title":"GPT-4 technical report","volume":"abs\/2303.08774","year":"2023","journal-title":"CoRR"},{"issue":"11","key":"ref35","doi-asserted-by":"crossref","first-page":"1083","DOI":"10.1109\/TSE.2017.2764464","article-title":"Metamorphic testing of restful web apis","volume":"44","author":"Segura","year":"2018","journal-title":"IEEE Trans. Software Eng."},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1093\/comjnl\/bxm043"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3241743"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-023-02448-8"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3524481.3527220"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.2307\/1165329"},{"key":"ref41","article-title":"Can large language models write good property-based tests?","volume":"abs\/2307.04346","author":"Vikram","year":"2023","journal-title":"CoRR"},{"key":"ref42","article-title":"Software testing with large language model: Survey, landscape, and vision","volume":"abs\/2307.07221","author":"Wang","year":"2023","journal-title":"CoRR"},{"key":"ref43","first-page":"191","article-title":"MBF4CR: A model-based framework for supporting an automated cancer registry system","volume-title":"Modelling Foundations and Applications - 12th European Conference, ECMFA@STAF 2016, Vienna, Austria, July 6\u20137, 2016, Proceedings","volume":"9764","author":"Wang","year":"2016"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/icsme.2017.22"},{"key":"ref45","article-title":"The shaky foundations of large language models and foundation models for electronic health records","volume":"6","author":"Wornow","year":"2023","journal-title":"npj Digitital Medicine"},{"key":"ref46","article-title":"Chatunitest: a chatgpt-based automated unit test generation tool","volume":"abs\/2305.04764","author":"Xie","year":"2023","journal-title":"CoRR"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3520312.3534862"},{"key":"ref48","article-title":"Large language models in medical term classification and unexpected misalignment between response and reasoning","volume":"abs\/2312.14184","author":"Zhang","year":"2023","journal-title":"CoRR"},{"key":"ref49","article-title":"Cumulative reasoning with large language models","volume":"abs\/2308.04371","author":"Zhang","year":"2023","journal-title":"CoRR"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/267580.267590"}],"event":{"name":"2025 IEEE Conference on Software Testing, Verification and Validation (ICST)","location":"Napoli, Italy","start":{"date-parts":[[2025,3,31]]},"end":{"date-parts":[[2025,4,4]]}},"container-title":["2025 IEEE Conference on Software Testing, Verification and Validation (ICST)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10988917\/10988918\/10989025.pdf?arnumber=10989025","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:33:13Z","timestamp":1747805593000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10989025\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/icst62969.2025.10989025","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}