{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:38:01Z","timestamp":1760056681495,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032013989","type":"print"},{"value":"9783032013996","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-01399-6_5","type":"book-chapter","created":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T12:50:55Z","timestamp":1760014255000},"page":"67-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluating Prompt Engineering Techniques for Accuracy and Confidence Elicitation in Medical LLMs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4820-5497","authenticated-orcid":false,"given":"Nariman","family":"Naderi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0642-4341","authenticated-orcid":false,"given":"Zahra","family":"Atf","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4271-8611","authenticated-orcid":false,"given":"Peter R.","family":"Lewis","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1681-0994","authenticated-orcid":false,"given":"Aref Mahjoub","family":"far","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9295-9283","authenticated-orcid":false,"given":"Seyed Amir Ahmad","family":"Safavi-Naini","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6900-5596","authenticated-orcid":false,"given":"Ali","family":"Soroush","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,10]]},"reference":[{"issue":"1","key":"5_CR1","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1038\/s41746-024-01029-4","volume":"7","author":"L Wang","year":"2024","unstructured":"Wang, L., et al.: Prompt engineering in consistency and reliability with the evidence-based guideline for LLMs. npj Digit. Med. 7(1), 41 (2024). https:\/\/doi.org\/10.1038\/s41746-024-01029-4","journal-title":"npj Digit. Med."},{"key":"5_CR2","doi-asserted-by":"publisher","unstructured":"Jin, D., Pan, E., Oufattole, N., Weng, W.-H., Fang, H., Szolovits, P.: What disease does this patient have? A large-scale open domain question answering dataset from medical exams, arXiv: arXiv:2009.13081 (2020).https:\/\/doi.org\/10.48550\/arXiv.2009.13081","DOI":"10.48550\/arXiv.2009.13081"},{"key":"5_CR3","doi-asserted-by":"publisher","unstructured":"Kadavath, S., et al.: Language models (Mostly) know what they know. arXiv: arXiv:2207.05221 (2022). https:\/\/doi.org\/10.48550\/arXiv.2207.05221","DOI":"10.48550\/arXiv.2207.05221"},{"key":"5_CR4","doi-asserted-by":"publisher","unstructured":"Xiong, M., et al.: Can LLMs express their uncertainty? An empirical evaluation of confidence elicitation in LLMs, arXiv: arXiv:2306.13063 (2024). https:\/\/doi.org\/10.48550\/arXiv.2306.13063","DOI":"10.48550\/arXiv.2306.13063"},{"key":"5_CR5","unstructured":"Agrawal, A., Suzgun, M., Mackey, L., Kalai, A.T.: Do language models know when they\u2019re hallucinating references?"},{"key":"5_CR6","unstructured":"Yadkori, Y.A., Kuzborskij, I., Gy\u00f6rgy, A., Szepesv\u00e1ri, C.: To believe or not to believe your LLM: iterative prompting for estimating epistemic uncertainty"},{"key":"5_CR7","doi-asserted-by":"publisher","unstructured":"Errica, F., Siracusano, G., Sanvito, D., Bifulco, R.: What did i do wrong? Quantifying llms\u2019 sensitivity and consistency to prompt engineering. arXiv: arXiv:2406.12334 (2025). https:\/\/doi.org\/10.48550\/arXiv.2406.12334","DOI":"10.48550\/arXiv.2406.12334"},{"key":"5_CR8","doi-asserted-by":"publisher","unstructured":"Azimi, I., Qi, M., Wang, L., Rahmani, A.M., Li, Y.: Accuracy and consistency of LLMs in the registered dietitian exam: the impact of prompt engineering and knowledge retrieval arXiv: arXiv:2408.02964 (2024). https:\/\/doi.org\/10.48550\/arXiv.2408.02964","DOI":"10.48550\/arXiv.2408.02964"},{"key":"5_CR9","doi-asserted-by":"publisher","unstructured":"Yang, A., Chen, C., Pitas, K.: Just rephrase it! Uncertainty estimation in closed-source language models via multiple rephrased queries, arXiv: arXiv:2405.13907. (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.13907","DOI":"10.48550\/arXiv.2405.13907"},{"key":"5_CR10","doi-asserted-by":"publisher","unstructured":"Becker, E., Soatto, S.: Cycles of thought: measuring LLM confidence through stable explanations. arXiv: arXiv:2406.03441 (2024). https:\/\/doi.org\/10.48550\/arXiv.2406.03441","DOI":"10.48550\/arXiv.2406.03441"},{"key":"5_CR11","doi-asserted-by":"publisher","unstructured":"Liu, S., et al.: Can LLMs learn uncertainty on their own? expressing uncertainty effectively in a self-training manner. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 21635\u201321645. Association for Computational Linguistics, Miami, Florida, USA (2024). https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.1205","DOI":"10.18653\/v1\/2024.emnlp-main.1205"},{"key":"5_CR12","doi-asserted-by":"publisher","unstructured":"Atf, Z., Lewis, P.R.: Is trust correlated with explainability in AI? A meta-analysis. IEEE Trans. Technol. Soc. 1\u20138 (2025). https:\/\/doi.org\/10.1109\/TTS.2025.3558448","DOI":"10.1109\/TTS.2025.3558448"},{"issue":"4","key":"5_CR13","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/MTS.2023.3340238","volume":"42","author":"Z Atf","year":"2023","unstructured":"Atf, Z., Lewis, P.R.: Human centricity in the relationship between explainability and trust in AI. IEEE Technol. Soc. Mag. 42(4), 66\u201376 (2023). https:\/\/doi.org\/10.1109\/MTS.2023.3340238","journal-title":"IEEE Technol. Soc. Mag."},{"key":"5_CR14","doi-asserted-by":"publisher","unstructured":"Tonolini, F., Aletras, N., Massiah, J., Kazai, G.: Bayesian prompt ensembles: model uncertainty estimation for black-box large language models. In: Findings of the Association for Computational Linguistics ACL 2024, Bangkok, Thailand and virtual meeting: Association for Computational Linguistics, pp. 12229\u201312272 (2024). https:\/\/doi.org\/10.18653\/v1\/2024.findings-acl.728","DOI":"10.18653\/v1\/2024.findings-acl.728"},{"key":"5_CR15","doi-asserted-by":"publisher","unstructured":"Ling, C., et al.: Uncertainty quantification for in-context learning of large language models. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 3357\u20133370. Association for Computational Linguistics, Mexico City, Mexico (2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.184","DOI":"10.18653\/v1\/2024.naacl-long.184"},{"key":"5_CR16","doi-asserted-by":"publisher","unstructured":"Ailem, M., Marazopoulou, K., Siska, C., Bono, J.: Examining the robustness of LLM evaluation to the distributional assumptions of benchmarks. arXiv: arXiv:2404.16966 (2024). https:\/\/doi.org\/10.48550\/arXiv.2404.16966","DOI":"10.48550\/arXiv.2404.16966"},{"key":"5_CR17","doi-asserted-by":"publisher","unstructured":"Xu, T., et al.: SaySelf: teaching LLMs to express confidence with self-reflective rationales. arXiv: arXiv:2405.20974 (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.20974","DOI":"10.48550\/arXiv.2405.20974"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Savage, T., et al.: Large language model uncertainty proxies\u202f: discrimination and calibration for medical diagnosis and treatment. J. Am. Med. Inform. Assoc. 1\u201311 (2024)","DOI":"10.1093\/jamia\/ocae254"},{"issue":"23","key":"5_CR19","doi-asserted-by":"publisher","first-page":"4581","DOI":"10.3390\/electronics13234581","volume":"13","author":"H Wu","year":"2024","unstructured":"Wu, H., Hong, H., Sun, L., Bai, X., Pu, M.: Harnessing response consistency for superior LLM performance: the promise and peril of answer-augmented prompting. Electronics 13(23), 4581 (2024). https:\/\/doi.org\/10.3390\/electronics13234581","journal-title":"Electronics"},{"issue":"2","key":"5_CR20","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1007\/s11023-024-09664-2","volume":"34","author":"PR Lewis","year":"2024","unstructured":"Lewis, P.R., Sarkadi, \u015e: Reflective artificial intelligence. Mind. Mach. 34(2), 14 (2024). https:\/\/doi.org\/10.1007\/s11023-024-09664-2","journal-title":"Mind. Mach."},{"key":"5_CR21","unstructured":"Atf, Z., et al.: The challenge of uncertainty quantification of large language models in medicine. arXiv preprint arXiv:2504.05278 (2025). https:\/\/arxiv.org\/abs\/2504.05278"}],"container-title":["Lecture Notes in Computer Science","Explainable, Trustworthy, and Responsible AI and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-01399-6_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T12:51:02Z","timestamp":1760014262000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-01399-6_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,10]]},"ISBN":["9783032013989","9783032013996"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-01399-6_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,10]]},"assertion":[{"value":"10 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EXTRAAMAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Explainable, Trustworthy, and Responsible AI and Multi-Agent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Detroit, MI","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 May 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 May 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"extraamas2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/extraamas.ehealth.hevs.ch\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}