{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T21:42:06Z","timestamp":1764970926860,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,11]]},"DOI":"10.1145\/3768322.3769017","type":"proceedings-article","created":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T21:39:41Z","timestamp":1764970781000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Promises and Pitfalls of Large Language Models use to interpret Healthcare Guidelines"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6643-4333","authenticated-orcid":false,"given":"Raghav","family":"Awasthi","sequence":"first","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1052-5546","authenticated-orcid":false,"given":"Shreya","family":"Mishra","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8170-2861","authenticated-orcid":false,"given":"Ashish","family":"Atreja","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1283-0514","authenticated-orcid":false,"given":"Charumathi","family":"Raghu Subramanian","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6398-6047","authenticated-orcid":false,"given":"Moises","family":"Auron","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3692-7254","authenticated-orcid":false,"given":"Nishant","family":"Singh","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6283-3049","authenticated-orcid":false,"given":"Jacek","family":"B. Cywinski","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0455-2892","authenticated-orcid":false,"given":"Francis","family":"Papay","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1961-5577","authenticated-orcid":false,"given":"Kamal","family":"Maheshwar","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9083-891X","authenticated-orcid":false,"given":"Ashish","family":"K Khanna","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3777-8767","authenticated-orcid":false,"given":"Piyush","family":"Mathur","sequence":"additional","affiliation":[{"name":"BrainXAI Research, BrainX,LLC., Cleveland, OH, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,5]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jacc.2019.03.010"},{"key":"e_1_3_2_2_2_1","volume-title":"MedRXIV (2023)","author":"Awasthi Raghav","year":"2023","unstructured":"Raghav Awasthi, Shreya Mishra, Dwarikanath Mahapatra, Ashish Khanna, Kamal Maheshwari, Jacek Cywinski, Frank Papay, and Piyush Mathur. 2023. HumanELY: Human evaluation of LLM yield, using a novel web-based evaluation tool. MedRXIV (2023), 2023\u201312."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.282.15.1458"},{"key":"e_1_3_2_2_4_1","volume-title":"AI Will See You Now\": How Do ChatGPT-4 Treatment Recommendations Align With Orthopaedic Clinical Practice Guidelines? Clinical Orthopaedics and Related Research\u00ae","author":"Dagher Tanios","year":"2022","unstructured":"Tanios Dagher, Emma P Dwyer, Hayden P Baker, Senthooran Kalidoss, and Jason A Strelzow. 2022. \"Dr. AI Will See You Now\": How Do ChatGPT-4 Treatment Recommendations Align With Orthopaedic Clinical Practice Guidelines? Clinical Orthopaedics and Related Research\u00ae (2022), 10\u20131097."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","first-page":"1965","DOI":"10.1001\/jama.2021.6238","article-title":"Screening for colorectal cancer: US Preventive Services Task Force recommendation statement","volume":"325","author":"Davidson Karina W","year":"2021","unstructured":"Karina W Davidson, Michael J Barry, Carol M Mangione, Michael Cabana, Aaron B Caughey, Esa M Davis, Katrina E Donahue, Chyke A Doubeni, Alex H Krist, Martha Kubik, et al. 2021. Screening for colorectal cancer: US Preventive Services Task Force recommendation statement. Jama 325, 19 (2021), 1965\u20131977.","journal-title":"Jama"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","first-page":"736","DOI":"10.1001\/jama.2021.12531","article-title":"Screening for prediabetes and type 2 diabetes: US Preventive Services Task Force recommendation statement","volume":"326","author":"Davidson Karina W","year":"2021","unstructured":"Karina W Davidson, Michael J Barry, Carol M Mangione, Michael Cabana, Aaron B Caughey, Esa M Davis, Katrina E Donahue, Chyke A Doubeni, Alex H Krist, Martha Kubik, et al. 2021. Screening for prediabetes and type 2 diabetes: US Preventive Services Task Force recommendation statement. Jama 326, 8 (2021), 736\u2013743.","journal-title":"Jama"},{"key":"e_1_3_2_2_7_1","volume-title":"GPT-4 for information retrieval and comparison of medical oncology guidelines. Nejm Ai 1, 6","author":"Ferber Dyke","year":"2024","unstructured":"Dyke Ferber, Isabella C Wiest, Georg W\u00f6lflein, Matthias P Ebert, Gernot Beutel, Jan-Niklas Eckardt, Daniel Truhn, Christoph Springfeld, Dirk J\u00e4ger, and Jakob Nikolas Kather. 2024. GPT-4 for information retrieval and comparison of medical oncology guidelines. Nejm Ai 1, 6 (2024), AIcs2300235."},{"key":"e_1_3_2_2_8_1","volume-title":"Flesch-Kincaid readability test. Retrieved October 26, 3","author":"Flesch Rudolf","year":"2007","unstructured":"Rudolf Flesch. 2007. Flesch-Kincaid readability test. Retrieved October 26, 3 (2007), 2007."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","first-page":"474","DOI":"10.1159\/000539658","article-title":"Utilizing ChatGPT to facilitate referrals for fetal echocardiography","volume":"51","author":"Kopylov Lital Gordin","year":"2024","unstructured":"Lital Gordin Kopylov, Itai Goldrat, Ron Maymon, Ran Svirsky, Yifat Wiener, and Eyal Klang. 2024. Utilizing ChatGPT to facilitate referrals for fetal echocardiography. Fetal Diagnosis and Therapy 51, 5 (2024), 474\u2013477.","journal-title":"Fetal Diagnosis and Therapy"},{"key":"e_1_3_2_2_10_1","first-page":"S14","article-title":"Toward evidence-based quality improvement: Evidence (and its limitations) of the effectiveness of guideline dissemination and implementation strategies 1966\u20131998","volume":"21","author":"Grimshaw Jeremy","year":"2006","unstructured":"Jeremy Grimshaw, Martin Eccles, Ruth Thomas, Graeme MacLennan, Craig Ramsay, Cynthia Fraser, and Luke Vale. 2006. Toward evidence-based quality improvement: Evidence (and its limitations) of the effectiveness of guideline dissemination and implementation strategies 1966\u20131998. Journal of general internal medicine 21, S2 (2006), S14\u2013S20.","journal-title":"Journal of general internal medicine"},{"key":"e_1_3_2_2_11_1","volume-title":"The ethics of ChatGPT in medicine and healthcare: a systematic review on Large Language Models (LLMs). NPJ digital medicine 7, 1","author":"Haltaufderheide Joschka","year":"2024","unstructured":"Joschka Haltaufderheide and Robert Ranisch. 2024. The ethics of ChatGPT in medicine and healthcare: a systematic review on Large Language Models (LLMs). NPJ digital medicine 7, 1 (2024), 183."},{"key":"e_1_3_2_2_12_1","volume-title":"Mingyu Lu, Kumail Alhamoud, Jimin Mun, Cristina Grau, Minseok Jung, Rodrigo R Gameiro, et al.","author":"Kim Yubin","year":"2025","unstructured":"Yubin Kim, Hyewon Jeong, Shen Chen, Shuyue Stella Li, Mingyu Lu, Kumail Alhamoud, Jimin Mun, Cristina Grau, Minseok Jung, Rodrigo R Gameiro, et al. 2025. Medical hallucination in foundation models and their impact on healthcare. medRxiv. (2025)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","first-page":"1314","DOI":"10.1016\/j.hlc.2024.03.005","article-title":"Appropriateness of ChatGPT in answering heart failure related questions","volume":"33","author":"King Ryan C","year":"2024","unstructured":"Ryan C King, Jamil S Samaan, Yee Hui Yeo, Behram Mody, Dawn M Lombardo, and Roxana Ghashghaei. 2024. Appropriateness of ChatGPT in answering heart failure related questions. Heart, Lung and Circulation 33, 9 (2024), 1314\u20131318.","journal-title":"Heart, Lung and Circulation"},{"key":"e_1_3_2_2_14_1","volume-title":"Optimization of hepatological clinical guidelines interpretation by large language models: a retrieval augmented generation-based framework. NPJ digital medicine 7, 1","author":"Kresevic Simone","year":"2024","unstructured":"Simone Kresevic, Mauro Giuffr\u00e8, Milos Ajcevic, Agostino Accardo, Lory S Croc\u00e8, and Dennis L Shung. 2024. Optimization of hepatological clinical guidelines interpretation by large language models: a retrieval augmented generation-based framework. NPJ digital medicine 7, 1 (2024), 102."},{"key":"e_1_3_2_2_15_1","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in neural information processing systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-024-02656-3"},{"key":"e_1_3_2_2_17_1","volume-title":"Christopher Burr, Josh Cowls, Indra Joshi, Mariarosaria Taddeo, and Luciano Floridi.","author":"Morley Jessica","year":"2020","unstructured":"Jessica Morley, Caio CV Machado, Christopher Burr, Josh Cowls, Indra Joshi, Mariarosaria Taddeo, and Luciano Floridi. 2020. The ethics of AI in health care: a mapping review. Social science & medicine 260 (2020), 113172."},{"key":"e_1_3_2_2_18_1","volume-title":"Proceedings of the 2015 conference of the north american chapter of the association for computational linguistics: Demonstrations. 96\u2013100","author":"Napolitano Diane","year":"2015","unstructured":"Diane Napolitano, Kathleen M Sheehan, and Robert Mundkowsky. 2015. Online readability and text complexity analysis with TextEvaluator. In Proceedings of the 2015 conference of the north american chapter of the association for computational linguistics: Demonstrations. 96\u2013100."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Cheng Peng Xi Yang Aokun Chen Kaleb E Smith Nima PourNejatian Anthony B Costa Cheryl Martin Mona G Flores Ying Zhang Tanja Magoc et al. 2023. A study of generative large language model for medical research and healthcare. NPJ digital medicine 6 1 (2023) 210.","DOI":"10.1038\/s41746-023-00958-w"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","first-page":"111756","DOI":"10.1016\/j.ejrad.2024.111756","article-title":"Enhancing chatbot performance for imaging recommendations: Leveraging GPT-4 and context-awareness for trustworthy clinical guidance","volume":"181","author":"Rau Alexander","year":"2024","unstructured":"Alexander Rau, Fabian Bamberg, Anna Fink, Phuong Hien Tran, Marco Reisert, and Maximilian F Russe. 2024. Enhancing chatbot performance for imaging recommendations: Leveraging GPT-4 and context-awareness for trustworthy clinical guidance. European Journal of Radiology 181 (2024), 111756.","journal-title":"European Journal of Radiology"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of workshop on natural language processing for improving textual accessibility. Citeseer, 14\u201322","author":"\u0160tajner Sanja","year":"2012","unstructured":"Sanja \u0160tajner, Richard Evans, Constantin Orasan, and Ruslan Mitkov. 2012. What can readability measures really tell us about text complexity. In Proceedings of workshop on natural language processing for improving textual accessibility. Citeseer, 14\u201322."},{"key":"e_1_3_2_2_22_1","volume-title":"High-performance medicine: the convergence of human and artificial intelligence. Nature medicine 25, 1","author":"Topol Eric J","year":"2019","unstructured":"Eric J Topol. 2019. High-performance medicine: the convergence of human and artificial intelligence. Nature medicine 25, 1 (2019), 44\u201356."},{"key":"e_1_3_2_2_23_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_2_24_1","volume-title":"MARIA SOLE Pellegrino, et al","author":"Vanacore Amalia","year":"2017","unstructured":"Amalia Vanacore, MARIA SOLE Pellegrino, et al. 2017. Benchmarking Rater Agreement Indices: Statistical Properties and Power Analysis. ENBIS-17 PROGRAMME AND ABSTRACTS (2017), 100\u2013100."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","first-page":"3615","DOI":"10.1038\/s41467-025-58551-6","article-title":"An automated framework for assessing how well LLMs cite relevant medical references","volume":"16","author":"Wu Kevin","year":"2025","unstructured":"Kevin Wu, Eric Wu, Kevin Wei, Angela Zhang, Allison Casasola, Teresa Nguyen, Sith Riantawan, Patricia Shi, Daniel Ho, and James Zou. 2025. An automated framework for assessing how well LLMs cite relevant medical references. Nature Communications 16, 1 (2025), 3615.","journal-title":"Nature Communications"},{"key":"e_1_3_2_2_26_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)."}],"event":{"name":"BCB Companion '25: Companion Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","location":"Element Philadelphia Downtown Philadelphia PA USA","acronym":"BCB Companion '25","sponsor":["SIGBio ACM Special Interest Group on Bioinformatics"]},"container-title":["Companion Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3768322.3769017","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T21:40:26Z","timestamp":1764970826000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3768322.3769017"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,11]]},"references-count":26,"alternative-id":["10.1145\/3768322.3769017","10.1145\/3768322"],"URL":"https:\/\/doi.org\/10.1145\/3768322.3769017","relation":{},"subject":[],"published":{"date-parts":[[2025,10,11]]},"assertion":[{"value":"2025-12-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}