{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T16:19:29Z","timestamp":1773850769288,"version":"3.50.1"},"reference-count":71,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000873","name":"Queen&apos;s University Belfast","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000873","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Biomedical Informatics"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1016\/j.jbi.2025.104932","type":"journal-article","created":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T16:18:10Z","timestamp":1760804290000},"page":"104932","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Towards a Biological Evaluation Framework for Oversampling (BEFO) gene expression data"],"prefix":"10.1016","volume":"171","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5062-4247","authenticated-orcid":false,"given":"Kevin","family":"Fee","sequence":"first","affiliation":[]},{"given":"Suneil","family":"Jain","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8583-8635","authenticated-orcid":false,"given":"Ross G.","family":"Murphy","sequence":"additional","affiliation":[]},{"given":"Anna","family":"Jurek-Loughrey","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jbi.2025.104932_b1","doi-asserted-by":"crossref","first-page":"2683","DOI":"10.3389\/fmicb.2019.02683","article-title":"Differential expression of signaling pathway genes associated with aflatoxin reduction quantitative trait loci in maize (Zea mays L.)","volume":"10","author":"Parish","year":"2019","journal-title":"Front. Microbiol."},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b2","doi-asserted-by":"crossref","first-page":"9403","DOI":"10.1038\/s41598-021-88875-4","article-title":"Synthetic single cell RNA sequencing data from small pilot studies using deep generative models","volume":"11","author":"Treppner","year":"2021","journal-title":"Sci. Rep."},{"issue":"6","key":"10.1016\/j.jbi.2025.104932_b3","doi-asserted-by":"crossref","first-page":"2670","DOI":"10.1109\/JBHI.2022.3196697","article-title":"Synthetic patient data generation and evaluation in disease prediction using small and imbalanced datasets","volume":"27","author":"Rodriguez-Almeida","year":"2023","journal-title":"IEEE J. Biomed. Heal. Informatics"},{"key":"10.1016\/j.jbi.2025.104932_b4","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1007\/s12038-022-00278-3","article-title":"Leveraging deep learning algorithms for synthetic data generation to design and analyze biological networks","volume":"47","author":"Achuthan","year":"2022","journal-title":"J. Biosci."},{"key":"10.1016\/j.jbi.2025.104932_b5","unstructured":"Amber J. Barton, Jennifer Hill, Andrew J. Pollard, Christoph J. Blohmke, Frontiers | transcriptomics in human challenge models."},{"issue":"9","key":"10.1016\/j.jbi.2025.104932_b6","doi-asserted-by":"crossref","first-page":"1452","DOI":"10.1038\/s12276-020-0422-0","article-title":"Single-cell transcriptomics in cancer: computational challenges and opportunities","volume":"52","author":"Fan","year":"2020","journal-title":"Exp. Mol. Med."},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b7","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1038\/s10038-020-0763-5","article-title":"Interpretation of omics data analyses","volume":"66","author":"Yamada","year":"2021","journal-title":"J. Hum. Genet."},{"key":"10.1016\/j.jbi.2025.104932_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.neulet.2023.137609","article-title":"A multiclass extreme gradient boosting model for evaluation of transcriptomic biomarkers in alzheimer\u2019s disease prediction","volume":"821","author":"Zhang","year":"2024","journal-title":"Neurosci. Lett."},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1471-2105-4-5","article-title":"Genomic data sampling and its effect on classification performance assessment","volume":"4","author":"Azuaje","year":"2003","journal-title":"BMC Bioinformatics"},{"key":"10.1016\/j.jbi.2025.104932_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2019.06.034","article-title":"Improving interpolation-based oversampling for imbalanced data learning","volume":"187","author":"Zhu","year":"2020","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.jbi.2025.104932_b11","series-title":"The harms of class imbalance corrections for machine learning based prediction models: a simulation study","author":"Carriero","year":"2024"},{"key":"10.1016\/j.jbi.2025.104932_b12","article-title":"A framework of rebalancing imbalanced healthcare data for rare events\u2019 classification: A case of look-alike sound-alike mix-up incident detection","volume":"2018","author":"Zhao","year":"2018","journal-title":"J. Heal. Eng."},{"issue":"11","key":"10.1016\/j.jbi.2025.104932_b13","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0259227","article-title":"An oversampling method for multi-class imbalanced data based on composite weights","volume":"16","author":"Deng","year":"2021","journal-title":"PLoS One"},{"key":"10.1016\/j.jbi.2025.104932_b14","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","article-title":"SMOTE: Synthetic minority over-sampling technique","volume":"16","author":"Chawla","year":"2002","journal-title":"J. Artificial Intelligence Res."},{"key":"10.1016\/j.jbi.2025.104932_b15","series-title":"2019 IEEE 6th International Conference on Engineering Technologies and Applied Sciences","first-page":"1","article-title":"Modified adaptive synthetic SMOTE to improve classification performance in imbalanced datasets","author":"Gameng","year":"2019"},{"key":"10.1016\/j.jbi.2025.104932_b16","doi-asserted-by":"crossref","first-page":"438","DOI":"10.1016\/j.ins.2021.03.041","article-title":"A novel oversampling technique for class-imbalanced learning based on SMOTE and natural neighbors","volume":"565","author":"Li","year":"2021","journal-title":"Inform. Sci."},{"key":"10.1016\/j.jbi.2025.104932_b17","series-title":"Advances in Intelligent Computing","first-page":"878","article-title":"Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning","author":"Han","year":"2005"},{"key":"10.1016\/j.jbi.2025.104932_b18","series-title":"ADASYN: Adaptive synthetic sampling approach for imbalanced learning","first-page":"1322","author":"He","year":"2008"},{"key":"10.1016\/j.jbi.2025.104932_b19","article-title":"Generative adversarial nets","volume":"vol. 27","author":"Goodfellow","year":"2014"},{"key":"10.1016\/j.jbi.2025.104932_b20","series-title":"Auto-encoding variational Bayes","author":"Kingma","year":"2013"},{"key":"10.1016\/j.jbi.2025.104932_b21","series-title":"Signature informed sampling for transcriptomic data","author":"Janakarajan","year":"2023"},{"key":"10.1016\/j.jbi.2025.104932_b22","first-page":"1310","article-title":"Addressing class imbalance of health data: A systematic literature review on modified synthetic minority oversampling technique (SMOTE) strategies","volume":"8","author":"Hairani","year":"2024","journal-title":"JOIV Int. J. Informatics Vis."},{"key":"10.1016\/j.jbi.2025.104932_b23","unstructured":"Mode collapse in generative adversarial networks: An overview | IEEE conference publication | IEEE Xplore."},{"key":"10.1016\/j.jbi.2025.104932_b24","series-title":"Understanding posterior collapse in generative latent variable models","author":"Lucas","year":"2019"},{"key":"10.1016\/j.jbi.2025.104932_b25","series-title":"On the influence of several factors on pathway enrichment analysis","author":"Mubeen","year":"2022"},{"key":"10.1016\/j.jbi.2025.104932_b26","series-title":"Towards biologically plausible and private gene expression data generation","author":"Chen","year":"2024"},{"key":"10.1016\/j.jbi.2025.104932_b27","series-title":"Proceedings of the 39th International Conference on Machine Learning","first-page":"290","article-title":"How faithful is your synthetic data? Sample-level metrics for evaluating and auditing generative models","author":"Alaa","year":"2022","ISSN":"https:\/\/id.crossref.org\/issn\/2640-3498","issn-type":"print"},{"issue":"4","key":"10.1016\/j.jbi.2025.104932_b28","doi-asserted-by":"crossref","first-page":"590","DOI":"10.55730\/1300-0632.4089","article-title":"Multi-label voice disorder classification using raw waveforms","volume":"32","author":"Di\u0307\u015fken","year":"2024","journal-title":"Turk. J. Electr. Eng. Comput. Sci."},{"key":"10.1016\/j.jbi.2025.104932_b29","series-title":"Deep clustering via center-oriented margin free-triplet loss for skin lesion detection in highly imbalanced datasets","author":"Ozturk","year":"2022"},{"key":"10.1016\/j.jbi.2025.104932_b30","series-title":"Weakly Supervised-Based Oversampling for High Imbalance and High Dimensionality Data Classification","author":"Qian","year":"2020"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b31","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1186\/1471-2105-14-106","article-title":"SMOTE for high-dimensional class-imbalanced data","volume":"14","author":"Blagus","year":"2013","journal-title":"BMC Bioinformatics"},{"key":"10.1016\/j.jbi.2025.104932_b32","series-title":"2018 4th International Conference on Science and Technology","first-page":"1","article-title":"Adaptive synthetic-nominal (ADASYN-N) and adaptive synthetic-KNN (ADASYN-KNN) for multiclass imbalance learning on laboratory test data","author":"Kurniawati","year":"2018"},{"key":"10.1016\/j.jbi.2025.104932_b33","series-title":"Predictive modelling of cardiac disease: Enhancing accuracy through machine learning algorithms and borderline-SMOTE technique","author":"Kumar","year":"2024"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b34","doi-asserted-by":"crossref","first-page":"1259","DOI":"10.1038\/s41467-021-21466-z","article-title":"Ensembled deep learning model outperforms human experts in diagnosing biliary atresia from sonographic gallbladder images","volume":"12","author":"Zhou","year":"2021","journal-title":"Nat. Commun."},{"issue":"2","key":"10.1016\/j.jbi.2025.104932_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2022.103235","article-title":"RGAN-EL: A GAN and ensemble learning-based hybrid approach for imbalanced data classification","volume":"60","author":"Ding","year":"2023","journal-title":"Inf. Process. Manage."},{"key":"10.1016\/j.jbi.2025.104932_b36","series-title":"Proceedings of the 34th International Conference on Machine Learning","first-page":"2391","article-title":"Adversarial variational Bayes: Unifying variational autoencoders and generative adversarial networks","author":"Mescheder","year":"2017","ISSN":"https:\/\/id.crossref.org\/issn\/2640-3498","issn-type":"print"},{"key":"10.1016\/j.jbi.2025.104932_b37","series-title":"GAN oversampling to boost accuracy of COVID-19 mortality prediction model","author":"Adiboina","year":"2020"},{"issue":"2","key":"10.1016\/j.jbi.2025.104932_b38","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1016\/S1016-8478(23)17582-0","article-title":"Clustering approaches to identifying gene expression patterns from DNA microarray data","volume":"25","author":"Do","year":"2008","journal-title":"Mol. Cells"},{"key":"10.1016\/j.jbi.2025.104932_b39","series-title":"Posterior collapse and latent variable non-identifiability","author":"Wang","year":"2023"},{"key":"10.1016\/j.jbi.2025.104932_b40","series-title":"Conditional wasserstein GAN-based oversampling of tabular data for imbalanced learning","author":"Engelmann","year":"2020"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b41","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1016\/j.eng.2018.11.018","article-title":"Wasserstein GAN-based small-sample augmentation for new-generation artificial intelligence: A case study of cancer-staging data in biology","volume":"5","author":"Liu","year":"2019","journal-title":"Engineering"},{"key":"10.1016\/j.jbi.2025.104932_b42","series-title":"2023 10th International Conference on Signal Processing and Integrated Networks","first-page":"627","article-title":"A new GAN-based data augmentation method for handling class imbalance in credit card fraud detection","author":"Strelcenia","year":"2023","ISSN":"https:\/\/id.crossref.org\/issn\/2688-769X","issn-type":"print"},{"key":"10.1016\/j.jbi.2025.104932_b43","series-title":"BSGAN: A novel oversampling technique for imbalanced pattern recognitions","author":"Ahsan","year":"2023"},{"key":"10.1016\/j.jbi.2025.104932_b44","doi-asserted-by":"crossref","first-page":"30655","DOI":"10.1109\/ACCESS.2022.3158977","article-title":"SMOTified-GAN for class imbalanced pattern classification problems","volume":"10","author":"Sharma","year":"2022","journal-title":"IEEE Access"},{"issue":"6","key":"10.1016\/j.jbi.2025.104932_b45","article-title":"Hdwgcna identifies co-expression networks in high-dimensional transcriptomics data","volume":"3","author":"Morabito","year":"2023","journal-title":"Cell Rep. Methods"},{"issue":"7","key":"10.1016\/j.jbi.2025.104932_b46","doi-asserted-by":"crossref","first-page":"1940","DOI":"10.3390\/pr11071940","article-title":"Dimension reduction and classifier-based feature selection for oversampled gene expression data and cancer classification","volume":"11","author":"Petinrin","year":"2023","journal-title":"Processes"},{"key":"10.1016\/j.jbi.2025.104932_b47","doi-asserted-by":"crossref","DOI":"10.3389\/fgene.2020.598118","article-title":"RNA based approaches to profile oncogenic pathways from low quantity samples to drive precision oncology strategies","volume":"11","author":"van de Stolpe","year":"2021","journal-title":"Front. Genet."},{"issue":"7","key":"10.1016\/j.jbi.2025.104932_b48","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1186\/s12859-019-2733-5","article-title":"Generalized gene co-expression analysis via subspace clustering using low-rank representation","volume":"20","author":"Wang","year":"2019","journal-title":"BMC Bioinformatics"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b49","doi-asserted-by":"crossref","first-page":"9854","DOI":"10.1038\/s41598-022-13796-9","article-title":"Use of a graph neural network to the weighted gene co-expression network analysis of Korean native cattle","volume":"12","author":"Lee","year":"2022","journal-title":"Sci. Rep."},{"key":"10.1016\/j.jbi.2025.104932_b50","series-title":"Evolution of Translational Omics: Lessons Learned and the Path Forward","article-title":"Omics-based clinical discovery: Science, technology, and applications","author":"Micheel","year":"2012"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b51","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1186\/s12859-022-04609-x","article-title":"Distance correlation application to gene co-expression network analysis","volume":"23","author":"Hou","year":"2022","journal-title":"BMC Bioinformatics"},{"key":"10.1016\/j.jbi.2025.104932_b52","unstructured":"Jennifer Chang, WGCNA gene correlation network analysis."},{"key":"10.1016\/j.jbi.2025.104932_b53","series-title":"GLOBECOM 2020 - 2020 IEEE Global Communications Conference","first-page":"01","article-title":"Oversampling algorithm based on reinforcement learning in imbalanced problems","author":"Zhou","year":"2020","ISSN":"https:\/\/id.crossref.org\/issn\/2576-6813","issn-type":"print"},{"key":"10.1016\/j.jbi.2025.104932_b54","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1016\/j.neucom.2018.01.063","article-title":"The gradual resampling ensemble for mining imbalanced data streams with concept drift","volume":"286","author":"Ren","year":"2018","journal-title":"Neurocomputing"},{"key":"10.1016\/j.jbi.2025.104932_b55","series-title":"2024 IEEE 12th International Conference on Intelligent Systems","first-page":"1","article-title":"Concept drift complexity for assessing sampling-induced concept drift in class-imbalanced data streams","author":"Hajmohammed","year":"2024"},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b56","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1093\/annonc\/mdx637","article-title":"Validation of a metastatic assay using biopsies to improve risk stratification in patients with prostate cancer treated with radical radiation therapy","volume":"29","author":"Jain","year":"2018","journal-title":"Ann. Oncol."},{"key":"10.1016\/j.jbi.2025.104932_b57","series-title":"The cancer genome atlas pan-cancer analysis project - PubMed","author":"Weinstein","year":"2013"},{"key":"10.1016\/j.jbi.2025.104932_b58","article-title":"Gene expression omnibus: NCBI gene expression and hybridization array data repository","author":"R","year":"2002","journal-title":"PubMed"},{"issue":"6","key":"10.1016\/j.jbi.2025.104932_b59","doi-asserted-by":"crossref","first-page":"883","DOI":"10.55730\/1300-0632.4108","article-title":"Lgformer: Informer-based personalized modeling for blood glucose prediction","volume":"32","author":"Yuewei","year":"2024","journal-title":"Turk. J. Electr. Eng. Comput. Sci."},{"issue":"1","key":"10.1016\/j.jbi.2025.104932_b60","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1186\/s12864-019-6413-7","article-title":"The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation","volume":"21","author":"Chicco","year":"2020","journal-title":"BMC Genomics"},{"key":"10.1016\/j.jbi.2025.104932_b61","doi-asserted-by":"crossref","first-page":"237","DOI":"10.4137\/BBI.S38316","article-title":"Clustering algorithms: Their application to gene expression data","volume":"10","author":"Oyelade","year":"2016","journal-title":"Bioinform. Biology Insights"},{"key":"10.1016\/j.jbi.2025.104932_b62","series-title":"Gene expression data analysis using hellinger correlation in weighted gene co-expression networks (WGCNA) - ScienceDirect","author":"Tianjiao Zhang","year":"2022"},{"issue":"18","key":"10.1016\/j.jbi.2025.104932_b63","doi-asserted-by":"crossref","first-page":"8546","DOI":"10.3390\/app11188546","article-title":"Selecting the suitable resampling strategy for imbalanced data classification regarding dataset properties","volume":"11","author":"Kraiem","year":"2021","journal-title":"Appl. Sci."},{"key":"10.1016\/j.jbi.2025.104932_b64","doi-asserted-by":"crossref","first-page":"13686","DOI":"10.1109\/ACCESS.2025.3531662","article-title":"Imbalanced data problem in machine learning: A review","volume":"13","author":"Altalhan","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.jbi.2025.104932_b65","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2019.105662","article-title":"An empirical comparison and evaluation of minority oversampling techniques on a large number of imbalanced datasets","volume":"83","author":"Kov\u00e1cs","year":"2019","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.jbi.2025.104932_b66","doi-asserted-by":"crossref","DOI":"10.1016\/j.infsof.2021.106742","article-title":"The impact of the distance metric and measure on SMOTE-based techniques in software defect prediction","volume":"142","author":"Feng","year":"2022","journal-title":"Inf. Softw. Technol."},{"key":"10.1016\/j.jbi.2025.104932_b67","series-title":"Investigating the synthetic minority class oversampling technique (SMOTE) on an imbalanced cardiovascular disease (CVD) dataset","author":"Apostolopoulos","year":"2020"},{"issue":"10","key":"10.1016\/j.jbi.2025.104932_b68","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/s10462-024-10884-2","article-title":"Handling imbalanced medical datasets: review of a decade of research","volume":"57","author":"Salmi","year":"2024","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.jbi.2025.104932_b69","series-title":"Stop oversampling for class imbalance learning: A critical review","author":"Hassanat","year":"2022"},{"key":"10.1016\/j.jbi.2025.104932_b70","series-title":"2023 3rd International Conference on Computer Science, Electronic Information Engineering and Intelligent Control Technology","first-page":"67","article-title":"Deep reinforcement learning and Dempster-Shafer theory: A unified approach to imbalanced classification","author":"Gan","year":"2023"},{"key":"10.1016\/j.jbi.2025.104932_b71","series-title":"Alignment helps make the most of multimodal data","author":"Arnold","year":"2025"}],"container-title":["Journal of Biomedical Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1532046425001613?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1532046425001613?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T10:14:22Z","timestamp":1773828862000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1532046425001613"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":71,"alternative-id":["S1532046425001613"],"URL":"https:\/\/doi.org\/10.1016\/j.jbi.2025.104932","relation":{},"ISSN":["1532-0464"],"issn-type":[{"value":"1532-0464","type":"print"}],"subject":[],"published":{"date-parts":[[2025,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Towards a Biological Evaluation Framework for Oversampling (BEFO) gene expression data","name":"articletitle","label":"Article Title"},{"value":"Journal of Biomedical Informatics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jbi.2025.104932","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier Inc.","name":"copyright","label":"Copyright"}],"article-number":"104932"}}