{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T15:23:46Z","timestamp":1767626626322,"version":"3.44.0"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T00:00:00Z","timestamp":1752537600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T00:00:00Z","timestamp":1752537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Discov Artif Intell"],"DOI":"10.1007\/s44163-025-00384-9","type":"journal-article","created":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T17:05:17Z","timestamp":1752599117000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Synthetic data generation in genomic cancer medicine: a review of global research trends in the last ten years"],"prefix":"10.1007","volume":"5","author":[{"given":"Valentina","family":"De Nicol\u00f3","sequence":"first","affiliation":[]},{"given":"Maria","family":"Frasca","sequence":"additional","affiliation":[]},{"given":"Agnese","family":"Graziosi","sequence":"additional","affiliation":[]},{"given":"Gianluca","family":"Gazzaniga","sequence":"additional","affiliation":[]},{"given":"Davide La","family":"Torre","sequence":"additional","affiliation":[]},{"given":"Arianna","family":"Pani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,15]]},"reference":[{"issue":"1","key":"384_CR1","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1186\/s40246-022-00396-x","volume":"16","author":"WS Alharbi","year":"2022","unstructured":"Alharbi WS, Rashid M. A review of deep learning applications in human genomics using next-generation sequencing data. Hum Genomics. 2022;16(1):26.","journal-title":"Hum Genomics"},{"key":"384_CR2","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-023-00727-2","author":"L Alzubaidi","year":"2023","unstructured":"Alzubaidi L, Bai J, Al-Sabaawi A, Santamar\u00eda J, Albahri AS, Al-dabbagh BSN, Fadhel MA, Manoufali M, Zhang J, Al-Timemy AH, Duan Y, Abdullah A, Farhan L, Lu Y, Gupta A, Albu F, Abbosh A, Gu Y. A survey on deep learning tools dealing with data scarcity: definitions, challenges, solutions, tips, and applications. J Big Data. 2023. https:\/\/doi.org\/10.1186\/s40537-023-00727-2.","journal-title":"J Big Data"},{"issue":"3","key":"384_CR3","first-page":"245","volume":"6","author":"BS Ballew","year":"2009","unstructured":"Ballew BS. Elsevier\u2019s Scopus\u00ae database. J Electron Resour Med Libr. 2009;6(3):245\u201352.","journal-title":"J Electron Resour Med Libr"},{"issue":"8","key":"384_CR4","doi-asserted-by":"publisher","first-page":"087002","DOI":"10.1289\/EHP4812","volume":"127","author":"D Chan","year":"2019","unstructured":"Chan D, Shao X, Dumargne M-C, Aarabi M, Simon M-M, Kwan T, Bailey JL, Robaire B, Kimmins S, Gabriel MCS, et al. Customized methylc-capture sequencing to evaluate variation in the human sperm DNA methylome representative of altered folate metabolism. Environ Health Persp. 2019;127(8):087002.","journal-title":"Environ Health Persp"},{"issue":"3","key":"384_CR5","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.cels.2017.08.014","volume":"5","author":"TE Chan","year":"2017","unstructured":"Chan TE, Stumpf MPH, Babtie AC. Gene regulatory network inference from single-cell data using multivariate information measures. Cell Syst. 2017;5(3):251\u201367.","journal-title":"Cell Syst"},{"issue":"6","key":"384_CR6","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1038\/s41551-021-00751-8","volume":"5","author":"RJ Chen","year":"2021","unstructured":"Chen RJ, Lu MY, Chen TY, Williamson DFK, Mahmood F. Synthetic data in machine learning for medicine and healthcare. Nat Biomed Eng. 2021;5(6):493\u20137.","journal-title":"Nat Biomed Eng"},{"key":"384_CR7","unstructured":"Choi E, Biswal S, Malin B, Duke J, Stewart WF, Sun J. Generating multi-label discrete patient records using generative adversarial networks. In: Machine learning for healthcare conference, 2017. p. 286\u2013305. PMLR."},{"issue":"1","key":"384_CR8","first-page":"22","volume":"1","author":"S Das","year":"2024","unstructured":"Das S, Mazumder S, Alam N, Vernekar M, Dam A, Bhowmick AK, Hajra S, Das JK, Basu B. Precision oncology in the era of genomics and artificial intelligence. J Curr Oncol Trends. 2024;1(1):22\u201330.","journal-title":"J Curr Oncol Trends"},{"issue":"17","key":"384_CR9","doi-asserted-by":"publisher","first-page":"i671","DOI":"10.1093\/bioinformatics\/bty589","volume":"34","author":"M El-Kebir","year":"2018","unstructured":"El-Kebir M. SPhyR: tumor phylogeny estimation from single-cell sequencing data under loss and error. Bioinformatics. 2018;34(17):i671\u20139.","journal-title":"Bioinformatics"},{"issue":"6","key":"384_CR10","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1038\/nrg3723","volume":"15","author":"Y Erlich","year":"2014","unstructured":"Erlich Y, Narayanan A. Routes for breaching and protecting genetic privacy. Nat Rev Genet. 2014;15(6):409\u201321.","journal-title":"Nat Rev Genet"},{"issue":"7","key":"384_CR11","doi-asserted-by":"publisher","first-page":"e102451","DOI":"10.1371\/journal.pone.0102451","volume":"9","author":"CK Fisher","year":"2014","unstructured":"Fisher CK, Mehta P. Identifying keystone species in the human gut microbiome from metagenomic timeseries using sparse linear regression. PLoS One. 2014;9(7):e102451.","journal-title":"PLoS One"},{"key":"384_CR12","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1007\/s44257-024-00017-y","volume":"2","author":"M Frasca","year":"2024","unstructured":"Frasca M, La Torre D, Repetto M, De Nicol\u00f2 V, Pravettoni G, Cutica I. Artificial intelligence applications to genomic data in cancer research: a review of recent trends and emerging areas. Discov Anal. 2024;2:10. https:\/\/doi.org\/10.1007\/s44257-024-00017-y.","journal-title":"Discov Anal"},{"issue":"5","key":"384_CR13","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1007\/s10462-024-10768-5","volume":"57","author":"I Ghebrehiwet","year":"2024","unstructured":"Ghebrehiwet I, Zaki N, Damseh R, Mohamad MS. Revolutionizing personalized medicine with generative AI: a systematic review. Artif Intell Rev. 2024;57(5):128.","journal-title":"Artif Intell Rev"},{"issue":"3","key":"384_CR14","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1093\/jamia\/ocaa196","volume":"28","author":"MA Haendel","year":"2021","unstructured":"Haendel MA, Chute CG, Bennett TD, Eichmann DA, Guinney J, Kibbe WA, Payne PRO, Pfaff ER, Robinson PN, Saltz JH, et al. The national Covid cohort collaborative (N3C): rationale, design, infrastructure, and deployment. J Am Med Informatics Assoc. 2021;28(3):427\u201343.","journal-title":"J Am Med Informatics Assoc"},{"key":"384_CR15","doi-asserted-by":"publisher","first-page":"e2300045","DOI":"10.1200\/CCI.23.00045","volume":"7","author":"F Jacobs","year":"2023","unstructured":"Jacobs F, D\u2019Amico S, Benvenuti C, Gaudio M, Saltalamacchia G, Miggiano C, De Sanctis R, Porta MGD, Santoro A, Zambelli A. Opportunities and challenges of synthetic data generation in oncology. JCO Clin Cancer Inform. 2023;7:e2300045.","journal-title":"JCO Clin Cancer Inform"},{"issue":"6","key":"384_CR16","doi-asserted-by":"publisher","first-page":"e0269097","DOI":"10.1371\/journal.pone.0269097","volume":"17","author":"Y Jiang","year":"2022","unstructured":"Jiang Y, Mosquera L, Jiang B, Kong L, El Emam K. Measuring re-identification risk using a synthetic estimator to enable data sharing. PLOS One. 2022;17(6):e0269097. https:\/\/doi.org\/10.1371\/journal.pone.0269097.","journal-title":"PLOS One"},{"issue":"1","key":"384_CR17","doi-asserted-by":"publisher","first-page":"e000167","DOI":"10.1136\/bmjmed-2022-000167","volume":"1","author":"T Kokosi","year":"2022","unstructured":"Kokosi T, Harron K. Synthetic data in medical research. BMJ Med. 2022;1(1):e000167. https:\/\/doi.org\/10.1136\/bmjmed-2022-000167.","journal-title":"BMJ Med"},{"issue":"8","key":"384_CR18","doi-asserted-by":"publisher","first-page":"1287","DOI":"10.1101\/gr.209973.116","volume":"27","author":"ML Leung","year":"2017","unstructured":"Leung ML, Davis A, Gao R, Casasent A, Wang Y, Sei E, Vilar E, Maru D, Kopetz S, Navin NE. Single-cell DNA sequencing reveals a late-dissemination model in metastatic colorectal cancer. Genome Res. 2017;27(8):1287\u201399.","journal-title":"Genome Res"},{"issue":"1","key":"384_CR19","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1186\/s13059-023-03039-z","volume":"24","author":"W Li","year":"2023","unstructured":"Li W, Kim M, Zhang K, Chen H, Jiang X, Harmanci A. COLLAGENE enables privacy-aware federated and collaborative genomic data analysis. Genome Biol. 2023;24(1):204.","journal-title":"Genome Biol"},{"issue":"15","key":"384_CR20","doi-asserted-by":"publisher","first-page":"e89","DOI":"10.1093\/nar\/gky423","volume":"46","author":"W Li","year":"2018","unstructured":"Li W, Li Q, Kang S, Same M, Zhou Y, Sun C, Liu C-C, Matsuoka L, Sher L, Wong WH, et al. Cancerdetector: ultrasensitive and non-invasive cancer detection at the resolution of individual reads using cell-free dna methylation sequencing data. Nucl Acids Res. 2018;46(15):e89\u2013e89.","journal-title":"Nucl Acids Res"},{"key":"384_CR21","doi-asserted-by":"crossref","unstructured":"Patki N, Wedge R, Veeramachaneni K. The synthetic data vault. In: 2016 IEEE international conference on data science and advanced analytics (DSAA). p. 399\u2013410. IEEE 2016.","DOI":"10.1109\/DSAA.2016.49"},{"issue":"1","key":"384_CR22","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1093\/bib\/bbv118","volume":"18","author":"K Shameer","year":"2017","unstructured":"Shameer K, Badgeley MA, Miotto R, Glicksberg BS, Morgan JW, Dudley JT. Translational bioinformatics in the era of real-time biomedical, health care and wellness data streams. Brief Bioinform. 2017;18(1):105\u201324.","journal-title":"Brief Bioinform"},{"issue":"11","key":"384_CR23","doi-asserted-by":"publisher","first-page":"e1004574","DOI":"10.1371\/journal.pcbi.1004574","volume":"11","author":"W-M Song","year":"2015","unstructured":"Song W-M, Zhang B. Multiscale embedded gene co-expression network analysis. PLoS Comput Biol. 2015;11(11):e1004574.","journal-title":"PLoS Comput Biol"},{"key":"384_CR24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12992-017-0319-4","volume":"14","author":"WM Sweileh","year":"2018","unstructured":"Sweileh WM. Research trends on human trafficking: a bibliometric analysis using Scopus database. Global Health. 2018;14:1\u201312.","journal-title":"Global Health"},{"issue":"5","key":"384_CR25","doi-asserted-by":"publisher","first-page":"828","DOI":"10.1016\/j.cell.2017.08.007","volume":"170","author":"A Torkamani","year":"2017","unstructured":"Torkamani A, Andersen KG, Steinhubl SR, Topol EJ. High-definition medicine. Cell. 2017;170(5):828\u201343. https:\/\/doi.org\/10.1016\/j.cell.2017.08.007.","journal-title":"Cell"},{"key":"384_CR26","doi-asserted-by":"publisher","first-page":"7","DOI":"10.3389\/frma.2017.00007","volume":"2","author":"B Vargas-Quesada","year":"2017","unstructured":"Vargas-Quesada B, Chinchilla-Rodr\u00edguez Z, Rodriguez N. Identification and visualization of the intellectual structure in graphene research. Front Res Metrics Anal. 2017;2:7.","journal-title":"Front Res Metrics Anal"},{"issue":"7","key":"384_CR27","doi-asserted-by":"publisher","first-page":"1181","DOI":"10.1093\/bioinformatics\/bty750","volume":"35","author":"H Wang","year":"2019","unstructured":"Wang H, Lengerich BJ, Aragam B, Xing EP. Precision lasso: accounting for correlations and linear dependencies in high-dimensional genomic data. Bioinformatics. 2019;35(7):1181\u20137.","journal-title":"Bioinformatics"},{"issue":"1","key":"384_CR28","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1186\/s12859-023-05262-8","volume":"24","author":"M Wysocka","year":"2023","unstructured":"Wysocka M, Wysocki O, Zufferey M, Landers D, Freitas A. A systematic review of biologically-informed deep learning models for cancer: fundamental trends for encoding and interpreting oncology data. BMC Bioinform. 2023;24(1):198.","journal-title":"BMC Bioinform"},{"issue":"1","key":"384_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1093\/bioinformatics\/btv544","volume":"32","author":"Z Yang","year":"2016","unstructured":"Yang Z, Michailidis G. A non-negative matrix factorization method for detecting modules in heterogeneous omics multi-modal data. Bioinformatics. 2016;32(1):1\u20138.","journal-title":"Bioinformatics"},{"issue":"2","key":"384_CR30","doi-asserted-by":"publisher","first-page":"e1009303","DOI":"10.1371\/journal.pgen.1009303","volume":"17","author":"B Yelmen","year":"2021","unstructured":"Yelmen B, Decelle A, Ongaro L, Marnetto D, Tallec C, Montinaro F, Furtlehner C, Pagani L, Jay F. Creating artificial human genomes using generative neural networks. PLOS Genet. 2021;17(2):e1009303. https:\/\/doi.org\/10.1371\/journal.pgen.1009303.","journal-title":"PLOS Genet"},{"issue":"4","key":"384_CR31","doi-asserted-by":"publisher","first-page":"967","DOI":"10.1002\/asi.23437","volume":"67","author":"J Zhang","year":"2016","unstructured":"Zhang J, Qi Y, Zheng F, Long C, Zuxun L, Duan Z. Comparing keywords plus of wos and author keywords: a case study of patient adherence research. J Assoc Inf Sci Technol. 2016;67(4):967\u201372.","journal-title":"J Assoc Inf Sci Technol"},{"issue":"1","key":"384_CR32","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1038\/s41588-018-0295-5","volume":"51","author":"J Zou","year":"2019","unstructured":"Zou J, Huss M, Abid A, Mohammadi P, Torkamani A, Telenti Amalio. A primer on deep learning in genomics. Nat Genet. 2019;51(1):12\u20138.","journal-title":"Nat Genet"}],"container-title":["Discover Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44163-025-00384-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44163-025-00384-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44163-025-00384-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T11:03:13Z","timestamp":1757242993000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44163-025-00384-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,15]]},"references-count":32,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["384"],"URL":"https:\/\/doi.org\/10.1007\/s44163-025-00384-9","relation":{},"ISSN":["2731-0809"],"issn-type":[{"type":"electronic","value":"2731-0809"}],"subject":[],"published":{"date-parts":[[2025,7,15]]},"assertion":[{"value":"12 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The authors declare no conflict of interest related to this study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"148"}}