{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T11:23:51Z","timestamp":1774524231302,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T00:00:00Z","timestamp":1771459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T00:00:00Z","timestamp":1774483200000},"content-version":"vor","delay-in-days":35,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"name":"Fondation AP-HP"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Med Inform Decis Mak"],"DOI":"10.1186\/s12911-026-03360-0","type":"journal-article","created":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T09:48:04Z","timestamp":1771494484000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Quantifying the effects of pseudonymisation on epidemiological research reliability: a tailored evaluation using a clinical data warehouse"],"prefix":"10.1186","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2550-9773","authenticated-orcid":false,"given":"Ariel","family":"Cohen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yannick","family":"Jacob","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6373-8956","authenticated-orcid":false,"given":"Gilles","family":"Chatellier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7616-2709","authenticated-orcid":false,"given":"Charline","family":"Jean","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Beno\u00eet","family":"Playe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandre","family":"Mouchet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6166-149X","authenticated-orcid":false,"given":"Etienne","family":"Audureau","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4057-416X","authenticated-orcid":false,"given":"Antoine","family":"Boutet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6413-5188","authenticated-orcid":false,"given":"Romain","family":"Bey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,19]]},"reference":[{"key":"3360_CR1","unstructured":"Datta S, Posada J, Olson G, Li W, O\u2019Reilly C, Balraj D, et al. A new paradigm for accelerating clinical data science at Stanford Medicine [Internet]. arXiv; 2020 [cited 2025 Mar 21]. Available from: http:\/\/arxiv.org\/abs\/2003.10534."},{"key":"3360_CR2","doi-asserted-by":"crossref","unstructured":"De Kok JWTM, De La Hoz M\u00c1A, De Jong Y, Brokke V, Elbers PWG, Thoral P, et al. A guide to sharing open healthcare data under the general data protection regulation. Sci Data. 2023;10(1):404.","DOI":"10.1038\/s41597-023-02256-2"},{"key":"3360_CR3","doi-asserted-by":"crossref","unstructured":"Tannier X, Wajsb\u00fcrt P, Calliger A, Dura B, Mouchet A, Hilka M, et al. Development and validation of a natural Language processing algorithm to pseudonymize documents in the context of a clinical data warehouse. Methods Inf Med [Internet]. 2024; Available from: https:\/\/hal.science\/hal-04752779.","DOI":"10.1055\/s-0044-1778693"},{"issue":"6221","key":"3360_CR4","doi-asserted-by":"publisher","first-page":"536","DOI":"10.1126\/science.1256297","volume":"347","author":"YA De Montjoye","year":"2015","unstructured":"De Montjoye YA, Radaelli L, Singh VK, Pentland A. Sandy. Unique in the shopping mall: on the reidentifiability of credit card metadata. Science. 2015;347(6221):536\u20139.","journal-title":"Science"},{"key":"3360_CR5","first-page":"2017082801","volume":"2017","author":"L Sweeney","year":"2017","unstructured":"Sweeney L, Yoo JS, Perovich L, Boronow KE, Brown P, Brody JG. Re-identification risks in HIPAA safe harbor data: A study of data from one environmental health study. Technol Sci. 2017;2017:2017082801.","journal-title":"Technol Sci"},{"issue":"8","key":"3360_CR6","doi-asserted-by":"publisher","first-page":"e2218605120","DOI":"10.1073\/pnas.2218605120","volume":"120","author":"T Dick","year":"2023","unstructured":"Dick T, Dwork C, Kearns M, Liu T, Roth A, Vietri G, et al. Confidence-ranked reconstruction of census microdata from published statistics. Proc Natl Acad Sci. 2023;120(8):e2218605120.","journal-title":"Proc Natl Acad Sci"},{"issue":"3","key":"3360_CR7","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1136\/jamia.2009.002725","volume":"17","author":"G Loukides","year":"2010","unstructured":"Loukides G, Denny JC, Malin B. The disclosure of diagnosis codes can breach research participants\u2019 privacy. J Am Med Inf Assoc. 2010;17(3):322\u20137.","journal-title":"J Am Med Inf Assoc"},{"issue":"1","key":"3360_CR8","doi-asserted-by":"publisher","first-page":"180298","DOI":"10.1038\/sdata.2018.298","volume":"6","author":"BP Hejblum","year":"2019","unstructured":"Hejblum BP, Weber GM, Liao KP, Palmer NP, Churchill S, Shadick NA, et al. Probabilistic record linkage of de-identified research datasets with discrepancies using diagnosis codes. Sci Data. 2019;6(1):180298.","journal-title":"Sci Data"},{"issue":"6117","key":"3360_CR9","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1126\/science.339.6117.262","volume":"339","author":"J Bohannon","year":"2013","unstructured":"Bohannon J. Genetics. Genealogy databases enable naming of anonymous DNA donors. Science. 2013;339(6117):262.","journal-title":"Science"},{"issue":"6117","key":"3360_CR10","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1126\/science.1229566","volume":"339","author":"M Gymrek","year":"2013","unstructured":"Gymrek M, McGuire AL, Golan D, Halperin E, Erlich Y. Identifying personal genomes by surname inference. Science. 2013;339(6117):321\u20134.","journal-title":"Science"},{"issue":"1","key":"3360_CR11","doi-asserted-by":"publisher","first-page":"1376","DOI":"10.1038\/srep01376","volume":"3","author":"YA De Montjoye","year":"2013","unstructured":"De Montjoye YA, Hidalgo CA, Verleysen M, Blondel VD. Unique in the crowd: the privacy bounds of human mobility. Sci Rep. 2013;3(1):1376.","journal-title":"Sci Rep"},{"issue":"5","key":"3360_CR12","doi-asserted-by":"publisher","first-page":"1612","DOI":"10.3390\/ijerph17051612","volume":"17","author":"A Pika","year":"2020","unstructured":"Pika A, Wynn MT, Budiono S, Ter Hofstede AHM, Van Der Aalst WMP, Reijers HA. Privacy-Preserving process mining in healthcare. Int J Environ Res Public Health. 2020;17(5):1612.","journal-title":"Int J Environ Res Public Health"},{"issue":"1","key":"3360_CR13","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1038\/s41746-023-00771-5","volume":"6","author":"M Guillaudeux","year":"2023","unstructured":"Guillaudeux M, Rousseau O, Petot J, Bennis Z, Dein CA, Goronflot T, et al. Patient-centric synthetic data generation, no reason to risk re-identification in biomedical data analysis. Npj Digit Med. 2023;6(1):37.","journal-title":"Npj Digit Med"},{"key":"3360_CR14","doi-asserted-by":"crossref","unstructured":"Kamdje Wabo G, Prasser F, Gierend K, Siegel F, Ganslandt T. Data quality\u2013 and utility-compliant anonymization of common data model\u2013harmonized electronic health record data: protocol for a scoping review. JMIR Res Protoc. 2023;12:e46471.","DOI":"10.2196\/46471"},{"issue":"1","key":"3360_CR15","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1186\/s12911-024-02545-9","volume":"24","author":"E Im","year":"2024","unstructured":"Im E, Kim H, Lee H, Jiang X, Kim JH. Exploring the tradeoff between data privacy and utility with a clinical data analysis use case. BMC Med Inf Decis Mak. 2024;24(1):147.","journal-title":"BMC Med Inf Decis Mak"},{"issue":"2","key":"3360_CR16","doi-asserted-by":"publisher","first-page":"e0000735","DOI":"10.1371\/journal.pdig.0000735","volume":"4","author":"P D Pau","year":"2025","unstructured":"Pau D, Bachot C, Monteil C, Vinet L, Boucher M, Sella N, et al. Comparison of anonymization techniques regarding statistical reproducibility. Armengol De La Hoz M\u00c1, editor. PLOS Digit Health. 2025 Feb 3;4(2):e0000735.","journal-title":"PLOS Digit Health"},{"issue":"4","key":"3360_CR17","doi-asserted-by":"publisher","first-page":"608","DOI":"10.1017\/S0950268811001208","volume":"140","author":"AL Soilly","year":"2012","unstructured":"Soilly AL, Ferdynus C, Desplanches O, Grimaldi M, Gouyon JB. Paediatric intensive care admissions for respiratory syncytial virus bronchiolitis in france: results of a retrospective survey and evaluation of the validity of a medical information system programme. Epidemiol Infect. 2012;140(4):608\u201316.","journal-title":"Epidemiol Infect"},{"issue":"1","key":"3360_CR18","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1186\/s13613-021-00884-8","volume":"11","author":"M Fartoukh","year":"2021","unstructured":"Fartoukh M, Voiriot G, Gu\u00e9rin L, Ricard JD, Combes A, Faure M, et al. Seasonal burden of severe influenza virus infection in the critically ill patients, using the assistance Publique-H\u00f4pitaux de Paris clinical data warehouse: a pilot study. Ann Intensive Care. 2021;11(1):117.","journal-title":"Ann Intensive Care"},{"issue":"11","key":"3360_CR19","doi-asserted-by":"publisher","first-page":"3680","DOI":"10.1007\/s11695-019-04053-6","volume":"29","author":"A Lazzati","year":"2019","unstructured":"Lazzati A, Chatellier G, Katsahian S. Readmissions after bariatric surgery in France, 2013\u20132016: a nationwide study on administrative data. Obes Surg. 2019;29(11):3680\u20139.","journal-title":"Obes Surg"},{"issue":"12","key":"3360_CR20","doi-asserted-by":"publisher","first-page":"1988","DOI":"10.1002\/ijc.34675","volume":"153","author":"E Kempf","year":"2023","unstructured":"Kempf E, Priou S, Lam\u00e9 G, Laurent A, Gu\u00e9vel E, Tzedakis S, et al. No changes in clinical presentation, treatment strategies and survival of pancreatic cancer cases during the SARS-COV \u20102 outbreak: A retrospective multicenter cohort study on real\u2010world data. Int J Cancer. 2023;153(12):1988\u201396.","journal-title":"Int J Cancer"},{"key":"3360_CR21","doi-asserted-by":"crossref","unstructured":"Jean C, Paillaud E, Boudou-Rouquette P, Martinez-Tapia C, Pamoukdjian F, Hag\u00e8ge M, et al. Hospital care trajectories of older adults with cancer and the associated clinical profiles. Oncologist. 2024;oyae301.","DOI":"10.1101\/2024.02.02.24302125"},{"issue":"1","key":"3360_CR22","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1038\/s41597-022-01561-6","volume":"9","author":"ME Ferr\u00e3o","year":"2022","unstructured":"Ferr\u00e3o ME, Prata P, Fazendeiro P. Utility-driven assessment of anonymized data via clustering. Sci Data. 2022;9(1):456.","journal-title":"Sci Data"},{"issue":"1","key":"3360_CR23","doi-asserted-by":"publisher","first-page":"3069","DOI":"10.1038\/s41467-019-10933-3","volume":"10","author":"L Rocher","year":"2019","unstructured":"Rocher L, Hendrickx JM, De Montjoye YA. Estimating the success of re-identifications in incomplete datasets using generative models. Nat Commun. 2019;10(1):3069.","journal-title":"Nat Commun"},{"issue":"1","key":"3360_CR24","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1038\/s41746-025-01431-6","volume":"8","author":"N Sella","year":"2025","unstructured":"Sella N, Guinot F, Lagrange N, Albou LP, Desponds J, Isambert H. Preserving information while respecting privacy through an information theoretic framework for synthetic health data generation. Npj Digit Med. 2025;8(1):49.","journal-title":"Npj Digit Med"},{"issue":"6","key":"3360_CR25","doi-asserted-by":"publisher","first-page":"1280","DOI":"10.1093\/jamia\/ocae069","volume":"31","author":"T Petit-Jean","year":"2024","unstructured":"Petit-Jean T, G\u00e9rardin C, Berthelot E, Chatellier G, Frank M, Tannier X, et al. Collaborative and privacy-enhancing workflows on a clinical data warehouse: an example developing natural Language processing pipelines to detect medical conditions. J Am Med Inf Assoc. 2024;31(6):1280\u201390.","journal-title":"J Am Med Inf Assoc"},{"issue":"6","key":"3360_CR26","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1136\/medethics-2019-105472","volume":"46","author":"E Ford","year":"2020","unstructured":"Ford E, Oswald M, Hassan L, Bozentko K, Nenadic G, Cassell J. Should free-text data in electronic medical records be shared for research? A citizens\u2019 jury study in the UK. J Med Ethics. 2020;46(6):367\u201377.","journal-title":"J Med Ethics"},{"issue":"6415","key":"3360_CR27","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1126\/science.aau4832","volume":"362","author":"Y Erlich","year":"2018","unstructured":"Erlich Y, Shor T, Pe\u2019er I, Carmi S. Identity inference of genomic data using long-range Familial searches. Science. 2018;362(6415):690\u20134.","journal-title":"Science"},{"issue":"3","key":"3360_CR28","doi-asserted-by":"publisher","first-page":"e112","DOI":"10.2196\/jmir.7763","volume":"20","author":"MP Tully","year":"2018","unstructured":"Tully MP, Bozentko K, Clement S, Hunn A, Hassan L, Norris R, et al. Investigating the extent to which patients should control access to patient records for research: A deliberative process using citizens\u2019 juries. J Med Internet Res. 2018;20(3):e112.","journal-title":"J Med Internet Res"},{"key":"3360_CR29","doi-asserted-by":"crossref","unstructured":"Lebrun T, B\u00e9ziaud L, Allard T, Boutet A, Gambs S, Maouche M. Synthetic data: generate avatar data on demand. In: Barhamgi M, Wang H, Wang X, editors. Web Information Systems Engineering \u2013 WISE 2024 [Internet]. Singapore: Springer Nature Singapore; 2025 [cited 2025 Jan 17]. p. 193\u2013203. (Lecture Notes in Computer Science; vol. 15440). Available from: https:\/\/link.springer.com\/10.1007\/978-981-96-0576-7_15.","DOI":"10.1007\/978-981-96-0576-7_15"},{"issue":"2","key":"3360_CR30","doi-asserted-by":"publisher","first-page":"e157","DOI":"10.1016\/S2589-7500(24)00196-1","volume":"7","author":"A Arora","year":"2025","unstructured":"Arora A, Wagner SK, Carpenter R, Jena R, Keane PA. The urgent need to accelerate synthetic data privacy frameworks for medical research. Lancet Digit Health. 2025;7(2):e157\u201360.","journal-title":"Lancet Digit Health"}],"container-title":["BMC Medical Informatics and Decision Making"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s12911-026-03360-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12911-026-03360-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12911-026-03360-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T10:46:22Z","timestamp":1774521982000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1186\/s12911-026-03360-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,19]]},"references-count":30,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["3360"],"URL":"https:\/\/doi.org\/10.1186\/s12911-026-03360-0","relation":{},"ISSN":["1472-6947"],"issn-type":[{"value":"1472-6947","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,19]]},"assertion":[{"value":"7 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The research was performed in accordance with relevant guidelines and regulations and approved by the institutional review board (IRB) of the Greater Paris University Hospitals (IRB00011591), administrative decision CSE23-13_EDS-PRIVACY. The Clinical Data warehouse of the Greater Paris University Hospitals and its IRB have been authorised by the CNIL (Commission Nationale de l\u2019Informatique et des Libert\u00e9s \u2013 French data protection authority) since January 19, 2017 (authorisation no. 1980120). This research was performed in accordance with the Declaration of Helsinki. French regulation does not require the patient\u2019s written consent for this type of research. In accordance with the European General Data Protection Regulation (GDPR) all patients were informed and those who opted out of the secondary use of their data for research were excluded from the study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"87"}}