{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T10:48:01Z","timestamp":1769856481999,"version":"3.49.0"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T00:00:00Z","timestamp":1749513600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T00:00:00Z","timestamp":1749513600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01646-7","type":"journal-article","created":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T22:08:33Z","timestamp":1749593313000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Empirical evaluation of artificial intelligence distillation techniques for ascertaining cancer outcomes from electronic health records"],"prefix":"10.1038","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4249-0311","authenticated-orcid":false,"given":"Irbaz Bin","family":"Riaz","sequence":"first","affiliation":[]},{"given":"Syed Arsalan Ahmed","family":"Naqvi","sequence":"additional","affiliation":[]},{"given":"Noman","family":"Ashraf","sequence":"additional","affiliation":[]},{"given":"Gordon J.","family":"Harris","sequence":"additional","affiliation":[]},{"given":"Kenneth L.","family":"Kehl","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,10]]},"reference":[{"key":"1646_CR1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-27358-6","volume":"12","author":"KL Kehl","year":"2021","unstructured":"Kehl, K. L. et al. Artificial intelligence-aided clinical annotation of a large multi-cancer genomic dataset. Nat. Commun. 12, 7304 (2021).","journal-title":"Nat. Commun."},{"key":"1646_CR2","doi-asserted-by":"publisher","first-page":"680","DOI":"10.1200\/CCI.20.00020","volume":"4","author":"KL Kehl","year":"2020","unstructured":"Kehl, K. L. et al. Natural language processing to ascertain cancer outcomes from medical oncologist notes. JCO Clin. Cancer Inf. 4, 680\u2013690 (2020).","journal-title":"JCO Clin. Cancer Inf."},{"key":"1646_CR3","doi-asserted-by":"publisher","first-page":"1421","DOI":"10.1001\/jamaoncol.2019.1800","volume":"5","author":"KL Kehl","year":"2019","unstructured":"Kehl, K. L. et al. Assessment of deep natural language processing in ascertaining oncologic outcomes from radiology reports. JAMA Oncol. 5, 1421\u20131429 (2019).","journal-title":"JAMA Oncol."},{"key":"1646_CR4","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-021-01533-7","volume":"21","author":"A Casey","year":"2021","unstructured":"Casey, A. et al. A systematic review of natural language processing applied to radiology reports. BMC Med. Inform. Decis. Mak. 21, 179 (2021).","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"1646_CR5","doi-asserted-by":"publisher","first-page":"1657","DOI":"10.1093\/jamia\/ocad133","volume":"30","author":"RSYC Tan","year":"2023","unstructured":"Tan, R. S. Y. C. et al. Inferring cancer disease response from radiology reports using large language models with data augmentation and prompting. J. Am. Med. Inform. Assoc. 30, 1657\u20131664 (2023).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1646_CR6","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1158\/2159-8290.CD-20-0419","volume":"11","author":"KC Arbour","year":"2021","unstructured":"Arbour, K. C. et al. Deep learning to estimate RECIST in patients with NSCLC treated with PD-1 blockade. Cancer Discov. 11, 59\u201367 (2021).","journal-title":"Cancer Discov."},{"key":"1646_CR7","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1016\/j.ejca.2008.10.026","volume":"45","author":"EA Eisenhauer","year":"2009","unstructured":"Eisenhauer, E. A. et al. New response evaluation criteria in solid tumours: revised RECIST guideline (version 1.1). Eur. J. Cancer 45, 228\u2013247 (2009).","journal-title":"Eur. J. Cancer"},{"key":"1646_CR8","unstructured":"Office for Civil Right, H. Std. for privacy of individually identifiable health information. Final rule. Federal Register (2002)."},{"key":"1646_CR9","unstructured":"Health Insurance Portability and Accountability Act (HIPAA) Privacy Rule. https:\/\/www.hhs.gov\/hipaa\/index.html."},{"key":"1646_CR10","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-48034-3","volume":"13","author":"J Hartley","year":"2023","unstructured":"Hartley, J., Sanchez, P. P., Haider, F. & Tsaftaris, S. A. Neural networks memorise personal information from one sample. Sci. Rep. 13, 21366 (2023).","journal-title":"Sci. Rep."},{"key":"1646_CR11","unstructured":"Neel, S. & Chang, P. Privacy issues in large language models: a survey. arXiv preprint arXiv:2312.06717 (2023)."},{"key":"1646_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3501813","volume":"13","author":"RS Antunes","year":"2022","unstructured":"Antunes, R. S., Andr\u00e9 da Costa, C., K\u00fcderle, A., Yari, I. A. & Eskofier, B. Federated learning for healthcare: systematic review and architecture proposal. ACM Trans. Intell. Syst. Technol. (TIST) 13, 1\u201323 (2022).","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"key":"1646_CR13","doi-asserted-by":"publisher","first-page":"3048","DOI":"10.1109\/TPAMI.2021.3055564","volume":"44","author":"L Wang","year":"2021","unstructured":"Wang, L. & Yoon, K.-J. Knowledge distillation and student-teacher learning for visual intelligence: a review and new outlooks. IEEE Trans. Pattern Anal. Mach. Intell. 44, 3048\u20133068 (2021).","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1646_CR14","unstructured":"Papernot, N. et al. Scalable private learning with pate. arXiv preprint arXiv:1802.08908 (2018)."},{"key":"1646_CR15","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-54071-x","volume":"15","author":"KL Kehl","year":"2024","unstructured":"Kehl, K. L. et al. Shareable artificial intelligence to extract cancer outcomes from electronic health records for precision oncology research. Nat. Commun. 15, 9787 (2024).","journal-title":"Nat. Commun."},{"key":"1646_CR16","doi-asserted-by":"crossref","unstructured":"Abadi, M. et al. Deep learning with differential privacy. In Proc. ACM SIGSAC Conference on Computer and Communications Security (ACM, 2016).","DOI":"10.1145\/2976749.2978318"},{"key":"1646_CR17","unstructured":"Jagielski, M. et al. Students parrot their teachers: membership inference on model distillation. In Proc. 37th International Conference on Neural Information Processing Systems (NIPS \u201823). 44382\u201344397 (Curran Associates Inc., Red Hook, NY, USA, 2023)."},{"key":"1646_CR18","doi-asserted-by":"publisher","first-page":"3732","DOI":"10.1109\/TNNLS.2019.2934906","volume":"31","author":"T Matiisen","year":"2019","unstructured":"Matiisen, T., Oliver, A., Cohen, T. & Schulman, J. Teacher\u2013student curriculum learning. IEEE Trans. Neural Netw. Learn. Syst. 31, 3732\u20133740 (2019).","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"1646_CR19","doi-asserted-by":"publisher","unstructured":"Zhao, J. et al. Multistage collaborative knowledge distillation from a large language model for semi-supervised sequence generation. In Proc. 62nd Annual Meeting of the Association for Computational Linguistics. Vol. 1, Long Papers (eds Ku, L. W., Martins, A. & Srikumar, V.) 14201\u201314214 (Association for Computational Linguistics, 2024). https:\/\/doi.org\/10.18653\/v1\/2024.acl-long.766.","DOI":"10.18653\/v1\/2024.acl-long.766"},{"key":"1646_CR20","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S. & Guestrin, C. In Proc. 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 1135\u20131144 (ACM, 2016).","DOI":"10.1145\/2939672.2939778"},{"key":"1646_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.106848","volume":"158","author":"N Khalid","year":"2023","unstructured":"Khalid, N., Qayyum, A., Bilal, M., Al-Fuqaha, A. & Qadir, J. Privacy-preserving artificial intelligence in healthcare: techniques and applications. Comput. Biol. Med. 158, 106848 (2023).","journal-title":"Comput. Biol. Med."},{"key":"1646_CR22","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1016\/j.neunet.2022.06.022","volume":"153","author":"R Venugopal","year":"2022","unstructured":"Venugopal, R. et al. Privacy preserving generative adversarial networks to model electronic health records. Neural Netw. 153, 339\u2013348 (2022).","journal-title":"Neural Netw."},{"key":"1646_CR23","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1080\/08839514.2020.1718343","volume":"34","author":"V Yogarajan","year":"2020","unstructured":"Yogarajan, V., Pfahringer, B. & Mayo, M. A review of automatic end-to-end de-identification: is high accuracy the only metric? Appl. Artif. Intell. 34, 251\u2013269 (2020).","journal-title":"Appl. Artif. Intell."},{"key":"1646_CR24","doi-asserted-by":"publisher","first-page":"S31","DOI":"10.4103\/sja.SJA_543_18","volume":"13","author":"S Cuschieri","year":"2019","unstructured":"Cuschieri, S. The STROBE guidelines. Saudi J. Anaesth. 13, S31\u2013s34 (2019).","journal-title":"Saudi J. Anaesth."},{"key":"1646_CR25","doi-asserted-by":"publisher","unstructured":"Nawrot, P. et al. Hierarchical transformers are more efficient language models. In Findings of the Association for Computational Linguistics: NAACL 2022 (eds Carpuat, M., de Marneffe M. C. & Ruiz, M. IV) 1559\u20131571 (Association for Computational Linguistics, 2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.117.","DOI":"10.18653\/v1\/2022.findings-naacl.117"},{"key":"1646_CR26","doi-asserted-by":"publisher","unstructured":"Alsentzer, E. et al. Publicly available clinical BERT embeddings. In Proc. 2nd Clinical Natural Language Processing Workshop (eds Rumshisky, A., Roberts, K., Bethard, S. & Naumann, T.). 72\u201378 (Association for Computational Linguistics, 2019). https:\/\/doi.org\/10.18653\/v1\/W19-1909.","DOI":"10.18653\/v1\/W19-1909"},{"key":"1646_CR27","unstructured":"Paszke, A. et al. PyTorch: an imperative style, high-performance deep learning library. In Proc. 33rd International Conference on Neural Information Processing Systems, 8026\u20138037 (Red Hook, NY, USA, Curran Associates Inc, 2019)."},{"key":"1646_CR28","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01899-x","volume":"10","author":"AEW Johnson","year":"2023","unstructured":"Johnson, A. E. W. et al. MIMIC-IV, a freely accessible electronic health record dataset. Sci. Data 10, 1 (2023).","journal-title":"Sci. Data"},{"key":"1646_CR29","unstructured":"Merity, S., Xiong, C., Bradbury, J. & Socher, R. Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843 (2016)."},{"key":"1646_CR30","unstructured":"Wiki-text-103, Hugging Face. https:\/\/huggingface.co\/datasets\/Salesforce\/wikitext."},{"key":"1646_CR31","unstructured":"Reimers, N., Freire, P., Becquin, G., Espejel, O. & Gante, J. Sentence-Transformers\/All-MiniLM-L6-v2 Hugging Face. Available online: https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2 (accessed on 8 February, 2025)."},{"key":"1646_CR32","doi-asserted-by":"crossref","unstructured":"Carlini, N. et al. Membership inference attacks from first principles. In Proc. IEEE Symposium on Security and Privacy (SP), 1897\u20131914 (IEEE, 2021).","DOI":"10.1109\/SP46214.2022.9833649"}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01646-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01646-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01646-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T22:08:36Z","timestamp":1749593316000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01646-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,10]]},"references-count":32,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1646"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01646-7","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,10]]},"assertion":[{"value":"9 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"I.B.R., S.A.A.N., N.A., K.L.K. do not report any disclosures relevant to this work. G.J.H. serves as the co-founder, chief science officer, and board member, Yunu, Inc and serves on the scientific advisory board, Fovia, Inc.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"This study was approved by the Institutional Review Board at Dana-Farber\/Harvard Cancer Center (IRB protocol #16-360; Strategic and Analytic Approach to Collecting Clinical and Outcomes Data and Derived Molecular Features for Annotating Profile Results). There was a waiver of informed consent given the minimal risk to patients of the medical record review and the large cohort size, which would have precluded re-approaching each patient individually for this specific study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"347"}}