{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:48:52Z","timestamp":1777016932766,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000002","name":"NIH (National Institutes of Health)","doi-asserted-by":"publisher","award":["R01NS133142"],"award-info":[{"award-number":["R01NS133142"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH (National Institutes of Health)","doi-asserted-by":"publisher","award":["R01HD101246"],"award-info":[{"award-number":["R01HD101246"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,17]]},"DOI":"10.1145\/3799830.3799848","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T06:45:08Z","timestamp":1777013108000},"page":"164-172","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["LLMs for Causal Reasoning in Medicine? A Call for Caution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8604-3890","authenticated-orcid":false,"given":"Saurabh","family":"Mathur","sequence":"first","affiliation":[{"name":"TU Darmstadt, Darmstadt, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8562-9840","authenticated-orcid":false,"given":"Ranveer","family":"Singh","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6405-5487","authenticated-orcid":false,"given":"Michael","family":"Skinner","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6769-0793","authenticated-orcid":false,"given":"Predrag","family":"Radivojac","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8379-0743","authenticated-orcid":false,"given":"David M.","family":"Haas","sequence":"additional","affiliation":[{"name":"Indiana University School of Medicine, Indianapolis, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7676-1346","authenticated-orcid":false,"given":"Lakshmi","family":"Raman","sequence":"additional","affiliation":[{"name":"University of Texas Southwestern Medical Center, Dallas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2849-2126","authenticated-orcid":false,"given":"Sriraam","family":"Natarajan","sequence":"additional","affiliation":[{"name":"University of Texas at Dallas, Richardson, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,23]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"S. Acid and L.\u00a0M. De\u00a0Campos. 2003. Searching for Bayesian network structures in the space of restricted acyclic partially directed graphs. JAIR 18 (May 2003) 445\u2013490.","DOI":"10.1613\/jair.1061"},{"key":"e_1_3_3_2_3_2","unstructured":"Bang An Shiyue Zhang and Mark Dredze. 2025. RAG LLMs are Not Safer: A Safety Analysis of Retrieval-Augmented Generation for Large Language Models. arXiv:https:\/\/arXiv.org\/abs\/2504.18041\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2504.18041"},{"key":"e_1_3_3_2_4_2","unstructured":"Anthropic. 2023. Claude: An AI Assistant. https:\/\/www.anthropic.com\/claude. Accessed: 2025-05-26."},{"key":"e_1_3_3_2_5_2","first-page":"52","volume-title":"Uncertainty in Artificial Intelligence","author":"Buntine Wray","year":"1991","unstructured":"Wray Buntine. 1991. Theory refinement on Bayesian networks. In Uncertainty in Artificial Intelligence. Elsevier, 52\u201360."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Benjamin Chin-Yee and Ross Upshur. 2018. Clinical judgement in the era of big data and predictive analytics. Journal of Evaluation in Clinical Practice 24 3 (2018) 638\u2013645.","DOI":"10.1111\/jep.12852"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Jurgen\u00a0AHR Claassen Dick\u00a0HJ Thijssen Ronney\u00a0B Panerai and Frank\u00a0M Faraci. 2021. Regulation of cerebral blood flow in humans: physiology and clinical implications of autoregulation. Physiological reviews 101 4 (2021) 1487\u20131559.","DOI":"10.1152\/physrev.00022.2020"},{"key":"e_1_3_3_2_8_2","unstructured":"DeepSeek-AI Aixin Liu Bei Feng Bing Xue et\u00a0al. 2025. DeepSeek-V3 Technical Report. arXiv:https:\/\/arXiv.org\/abs\/2412.19437\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2412.19437"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Aaron\u00a0J Fried Spencer\u00a0D Dorn William\u00a0J Leland Emily Mullen Donna\u00a0M Williams Aimee\u00a0K Zaas Jack MacGuire and Debra\u00a0L Bynum. 2024. Large language models in internal medicine residency: current use and attitudes among internal medicine residents. Discover Artificial Intelligence 4 1 (2024) 70.","DOI":"10.1007\/s44163-024-00173-w"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Clark Glymour and Madelyn\u00a0R Glymour. 2014. Commentary: race and sex are causes. Epidemiology 25 4 (2014) 488\u2013490.","DOI":"10.1097\/EDE.0000000000000122"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Ruocheng Guo Lu Cheng Jundong Li P.\u00a0Richard Hahn and Huan Liu. 2020. A Survey of Learning Causality with Data: Problems and Methods. ACM Comput. Surv. 53 4 Article 75 (July 2020) 37\u00a0pages. 10.1145\/3397269","DOI":"10.1145\/3397269"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"David Heckerman Dan Geiger and David\u00a0M Chickering. 1995. Learning Bayesian networks: The combination of knowledge and statistical data. Machine learning 20 3 (1995) 197\u2013243.","DOI":"10.1023\/A:1022623210503"},{"key":"e_1_3_3_2_13_2","unstructured":"Ari Holtzman Jan Buys Li Du Maxwell Forbes and Yejin Choi. 2020. The Curious Case of Neural Text Degeneration. arXiv:https:\/\/arXiv.org\/abs\/1904.09751\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1904.09751"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Dominik Janzing and Bernhard Sch\u00f6lkopf. 2010. Causal inference using the algorithmic Markov condition. IEEE Transactions on Information Theory 56 10 (2010) 5168\u20135194.","DOI":"10.1109\/TIT.2010.2060095"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Subbarao Kambhampati. 2024. Can large language models reason and plan? Annals of the New York Academy of Sciences 1534 1 (2024) 15\u201318.","DOI":"10.1111\/nyas.15125"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Mert Karabacak and Konstantinos Margetis. 2023. Embracing large language models for medical applications: opportunities and challenges. Cureus 15 5 (2023).","DOI":"10.7759\/cureus.39305"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Neville\u00a0Kenneth Kitson Anthony\u00a0C Constantinou Zhigao Guo Yang Liu and Kiattikun Chobtham. 2023. A survey of Bayesian Network structure learning. Artificial Intelligence Review 56 8 (2023) 8721\u20138814.","DOI":"10.1007\/s10462-022-10351-w"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Isaac\u00a0S Kohane. 2024. Compared with what? Measuring AI against the health care we have. New England Journal of Medicine 391 17 (2024) 1564\u20131566.","DOI":"10.1056\/NEJMp2404691"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.5555\/1795555"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Benjamin Kuipers and Jerome\u00a0P Kassirer. 1984. Causal reasoning in medicine: analysis of a protocol. Cognitive Science 8 4 (1984) 363\u2013385.","DOI":"10.1207\/s15516709cog0804_3"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-4832-1451-1.50034-2"},{"key":"e_1_3_3_2_22_2","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et\u00a0al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in neural information processing systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_3_2_23_2","unstructured":"Fenglin Liu Hongjian Zhou Boyang Gu Xinyu Zou Jinfa Huang Jinge Wu Yiru Li Sam\u00a0S Chen Yining Hua Peilin Zhou et\u00a0al. 2025. Application of large language models in medicine. Nature Reviews Bioengineering (2025) 1\u201320."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Renqian Luo Liai Sun Yingce Xia Tao Qin Sheng Zhang Hoifung Poon and Tie-Yan Liu. 2022. BioGPT: generative pre-trained transformer for biomedical text generation and mining. Briefings in bioinformatics 23 6 (2022).","DOI":"10.1093\/bib\/bbac409"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-66538-7_29"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-95841-0_48"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Liam\u00a0G McCoy Arjun\u00a0K Manrai and Adam Rodman. 2024. Large language models and the degradation of the medical record. The New England journal of medicine 391 17 (2024) 1561\u20131564.","DOI":"10.1056\/NEJMp2405999"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"R\u00a0Thomas McCoy Shunyu Yao Dan Friedman Mathew\u00a0D Hardy and Thomas\u00a0L Griffiths. 2024. Embers of autoregression show how large language models are shaped by the problem they are trained to solve. Proceedings of the National Academy of Sciences 121 41 (2024) e2322420121.","DOI":"10.1073\/pnas.2322420121"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26100"},{"key":"e_1_3_3_2_30_2","volume-title":"Levenshtein Distance: Information theory, Computer science, String (computer science), String metric, Damerau?Levenshtein distance, Spell checker, Hamming distance","author":"Miller Frederic\u00a0P.","year":"2009","unstructured":"Frederic\u00a0P. Miller, Agnes\u00a0F. Vandome, and John McBrewster. 2009. Levenshtein Distance: Information theory, Computer science, String (computer science), String metric, Damerau?Levenshtein distance, Spell checker, Hamming distance. Alpha Press."},{"key":"e_1_3_3_2_31_2","unstructured":"Shervin Minaee Tomas Mikolov Narjes Nikzad Meysam Chenaghlu Richard Socher Xavier Amatriain and Jianfeng Gao. 2024. Large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.06196 (2024)."},{"key":"e_1_3_3_2_32_2","unstructured":"Raymond\u00a0J Mooney and Jude\u00a0W Shavlik. 2021. A Recap of Early Work on Theory and Knowledge Refinement."},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i27.35083"},{"key":"e_1_3_3_2_34_2","unstructured":"OpenAI. 2025. ChatGPT: GPT-4o Language Model. https:\/\/openai.com\/chatgpt."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Jonas Peters and Peter B\u00fchlmann. 2015. Structural Intervention Distance for Evaluating Causal Graphs. Neural Computation 27 3 (2015) 771\u2013799.","DOI":"10.1162\/NECO_a_00708"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/277"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Lawrence Rabiner and Biinghwang Juang. 1986. An introduction to hidden Markov models. ieee assp magazine 3 1 (1986) 4\u201316.","DOI":"10.1109\/MASSP.1986.1165342"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Pedro Sanchez Jeremy\u00a0P Voisey Tian Xia Hannah\u00a0I Watson Alison\u00a0Q O\u2019Neil and Sotirios\u00a0A Tsaftaris. 2022. Causal machine learning for healthcare and precision medicine. Royal Society Open Science 9 8 (2022) 220638.","DOI":"10.1098\/rsos.220638"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Ilan\u00a0S Schwartz Katherine\u00a0E Link Roxana Daneshjou and Nicol\u00e1s Cort\u00e9s-Penfield. 2024. Black box warning: large language models and the future of infectious diseases consultation. Clinical infectious diseases 78 4 (2024) 860\u2013866.","DOI":"10.1093\/cid\/ciad633"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0A Shwe Blackford Middleton David\u00a0E Heckerman Max Henrion Eric\u00a0J Horvitz Harold\u00a0P Lehmann and Gregory\u00a0F Cooper. 1991. Probabilistic diagnosis using a reformulation of the INTERNIST-1\/QMR knowledge base. Methods of information in Medicine 30 04 (1991) 241\u2013255.","DOI":"10.1055\/s-0038-1634846"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Karan Singhal Tao Tu Juraj Gottweis Rory Sayres Ellery Wulczyn Mohamed Amin Le Hou Kevin Clark Stephen\u00a0R Pfohl Heather Cole-Lewis et\u00a0al. 2025. Toward expert-level medical question answering with large language models. Nature Medicine 31 3 (2025) 943\u2013950.","DOI":"10.1038\/s41591-024-03423-7"},{"key":"e_1_3_3_2_43_2","volume-title":"Causation, prediction, and search","author":"Spirtes Peter","year":"2000","unstructured":"Peter Spirtes, Clark\u00a0N Glymour, and Richard Scheines. 2000. Causation, prediction, and search. MIT press."},{"key":"e_1_3_3_2_44_2","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu et\u00a0al. 2025. Gemini: A Family of Highly Capable Multimodal Models. arXiv:https:\/\/arXiv.org\/abs\/2312.11805\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2312.11805"},{"key":"e_1_3_3_2_45_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aur\u00e9lien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. https:\/\/arxiv.org\/abs\/2302.13971"},{"key":"e_1_3_3_2_46_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_47_2","first-page":"3412","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Wahl Jonas","year":"2025","unstructured":"Jonas Wahl and Jakob Runge. 2025. Separation-Based Distance Measures for Causal Graphs. In International Conference on Artificial Intelligence and Statistics. PMLR, 3412\u20133420."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Razeghi Yasaman Robert Logan\u00a0IV Gardner Matt and S Sameer. 2022. Impact of pretraining term frequencies on few-shot numerical reasoning. Findings of the Association for Computational Linguistics: EMNLP 2022 (2022) 840\u2013854.","DOI":"10.18653\/v1\/2022.findings-emnlp.59"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"Alessio Zanga Elif Ozkirimli and Fabio Stella. 2022. A survey on causal discovery: theory and practice. International Journal of Approximate Reasoning 151 (2022) 101\u2013129.","DOI":"10.1016\/j.ijar.2022.09.004"},{"key":"e_1_3_3_2_50_2","unstructured":"Matej Ze\u010devi\u0107 Moritz Willig Devendra\u00a0Singh Dhami and Kristian Kersting. 2023. Causal Parrots: Large Language Models May Talk Causality But Are Not Causal. Transactions on Machine Learning Research (2023)."}],"event":{"name":"CODS 2025: 13th ACM IKDD International Conference on Data Science","location":"Pune India","acronym":"CODS 2025"},"container-title":["Proceedings of the 13th ACM IKDD International Conference on Data Science"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3799830.3799848","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:18:40Z","timestamp":1777015120000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3799830.3799848"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,17]]},"references-count":49,"alternative-id":["10.1145\/3799830.3799848","10.1145\/3799830"],"URL":"https:\/\/doi.org\/10.1145\/3799830.3799848","relation":{},"subject":[],"published":{"date-parts":[[2025,12,17]]},"assertion":[{"value":"2026-04-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}