{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T09:31:19Z","timestamp":1776331879159,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T00:00:00Z","timestamp":1725408000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T00:00:00Z","timestamp":1725408000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"The Science Research Project of Education Department of Liaoning Province","award":["LJKZ0765"],"award-info":[{"award-number":["LJKZ0765"]}]},{"name":"The Science and Technology Planning Project of Liaoning Province","award":["2021JH4\/10200008"],"award-info":[{"award-number":["2021JH4\/10200008"]}]},{"name":"The Science Research Project of Shenyang City","award":["23-506-3-01-21"],"award-info":[{"award-number":["23-506-3-01-21"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BioData Mining"],"DOI":"10.1186\/s13040-024-00384-y","type":"journal-article","created":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T07:07:45Z","timestamp":1725433665000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Processing imbalanced medical data at the data level with assisted-reproduction data as an example"],"prefix":"10.1186","volume":"17","author":[{"given":"Junliang","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Shaowei","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Jiaji","family":"He","sequence":"additional","affiliation":[]},{"given":"Dongchao","family":"Su","sequence":"additional","affiliation":[]},{"given":"Weijie","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Xueying","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Hongbo","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,4]]},"reference":[{"issue":"13","key":"384_CR1","doi-asserted-by":"publisher","first-page":"1317","DOI":"10.1001\/jama.2017.18391","volume":"319","author":"AL Beam","year":"2018","unstructured":"Beam AL, Kohane IS. Big data and machine learning in health care. JAMA. 2018;319(13):1317\u201318.","journal-title":"JAMA"},{"issue":"3","key":"384_CR2","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1021\/acssensors.3c02670","volume":"9","author":"S Lu","year":"2024","unstructured":"Lu S, Yang J, Gu Y, He D, Wu H, Sun W, et al. Advances in machine learning processing of big data from disease diagnosis sensors. ACS Sens. 2024;9(3):1134\u201348.","journal-title":"ACS Sens"},{"key":"384_CR3","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1016\/j.ins.2022.12.046","volume":"624","author":"SN Shi","year":"2023","unstructured":"Shi SN, Li J, Zhu D, Yang F, Xu Y. A hybrid imbalanced classification model based on data density. Inf Sci. 2023;624:50\u201367.","journal-title":"Inf Sci"},{"key":"384_CR4","doi-asserted-by":"publisher","first-page":"106087","DOI":"10.1016\/j.knosys.2020.106087","volume":"203","author":"JK Zhao","year":"2020","unstructured":"Zhao JK, Jin J, Chen S, Zhang RF, Yu BL, Liu QF. A weighted hybrid ensemble method for classifying imbalanced data. Knowl-Based Syst. 2020;203:106087.","journal-title":"Knowl-Based Syst"},{"issue":"2","key":"384_CR5","doi-asserted-by":"publisher","first-page":"224","DOI":"10.7763\/IJMLC.2013.V3.307","volume":"3","author":"MM Rahman","year":"2013","unstructured":"Rahman MM, Davis DN. Addressing the class imbalance problem in medical datasets. Int J Mach Learn Comput. 2013;3(2):224.","journal-title":"Int J Mach Learn Comput"},{"key":"384_CR6","doi-asserted-by":"publisher","first-page":"103089","DOI":"10.1016\/j.jbi.2018.12.003","volume":"90","author":"S Fotouhi","year":"2019","unstructured":"Fotouhi S, Asadi S, Kattan MW. A comprehensive data level analysis for cancer diagnosis on imbalanced data. J Biomed Inf. 2019;90:103089.","journal-title":"J Biomed Inf"},{"key":"384_CR7","doi-asserted-by":"publisher","first-page":"102289","DOI":"10.1016\/j.artmed.2022.102289","volume":"128","author":"MM Ahsan","year":"2022","unstructured":"Ahsan MM, Siddique Z. Machine learning-based heart disease diagnosis: a systematic literature review. Artif Intell Med. 2022;128:102289.","journal-title":"Artif Intell Med"},{"issue":"1","key":"384_CR8","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1186\/s12911-024-02521-3","volume":"24","author":"G Drouard","year":"2024","unstructured":"Drouard G, Mykk\u00e4nen J, Heiskanen J, Pohjonen J, Ruohonen S, Pahkala K, et al. Exploring machine learning strategies for predicting cardiovascular disease risk factors from multi-omic data. BMC Med Inf Decis Mak. 2024;24(1):116.","journal-title":"BMC Med Inf Decis Mak"},{"key":"384_CR9","doi-asserted-by":"publisher","first-page":"e44081","DOI":"10.2196\/44081","volume":"25","author":"Y Ren","year":"2023","unstructured":"Ren Y, Wu D, Tong Y, L\u00f3pez-DeFede A, Gareau S. Issue of data imbalance on low birthweight baby outcomes prediction and associated risk factors identification: establishment of benchmarking key machine learning models with data rebalancing strategies. J Med Internet Res. 2023;25:e44081.","journal-title":"J Med Internet Res"},{"issue":"9","key":"384_CR10","doi-asserted-by":"publisher","first-page":"6390","DOI":"10.1109\/TNNLS.2021.3136503","volume":"34","author":"D Dablain","year":"2023","unstructured":"Dablain D, Krawczyk B, Chawla NV. DeepSMOTE: fusing deep learning and SMOTE for imbalanced data. IEEE Trans Neural Netw Learn Syst. 2023;34(9):6390\u2013404.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"384_CR11","doi-asserted-by":"publisher","first-page":"110415","DOI":"10.1016\/j.asoc.2023.110415","volume":"143","author":"S Rezvani","year":"2023","unstructured":"Rezvani S, Wang X. A broad review on class imbalance learning techniques. Appl Soft Comput. 2023;143:110415.","journal-title":"Appl Soft Comput"},{"key":"384_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csda.2017.01.005","volume":"111","author":"J Gong","year":"2017","unstructured":"Gong J, Kim H, RHSBoost. Improving classification performance in imbalance data. Comput Stat Data An. 2017;111:1\u201313.","journal-title":"Comput Stat Data An"},{"issue":"13","key":"384_CR13","doi-asserted-by":"publisher","first-page":"3257","DOI":"10.1080\/02664763.2021.1939662","volume":"49","author":"L Zhang","year":"2022","unstructured":"Zhang L, Geisler T, Ray H, Xie Y. Improving logistic regression on the imbalanced data by a novel penalized log-likelihood function. J Appl Stat. 2022;49(13):3257\u201377.","journal-title":"J Appl Stat"},{"key":"384_CR14","doi-asserted-by":"publisher","first-page":"119893","DOI":"10.1016\/j.ins.2023.119893","volume":"655","author":"G Charizanos","year":"2024","unstructured":"Charizanos G, Demirhan H, \u0130\u00e7en D. A Monte Carlo fuzzy logistic regression framework against imbalance and separation. Inf Sci. 2024;655:119893.","journal-title":"Inf Sci"},{"issue":"10","key":"384_CR15","doi-asserted-by":"publisher","first-page":"3708","DOI":"10.1007\/s11227-015-1541-6","volume":"72","author":"J Li","year":"2016","unstructured":"Li J, Fong S, Mohammed S, Fiaidhi J. Improving the classification performance of biological imbalanced datasets by swarm optimization algorithms. J Supercomput. 2016;72(10):3708\u201328.","journal-title":"J Supercomput"},{"key":"384_CR16","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1016\/j.neunet.2020.06.026","volume":"130","author":"KH Kim","year":"2020","unstructured":"Kim KH, Sohn SY. Hybrid neural network with cost-sensitive support vector machine for class-imbalanced multimodal data. Neural Netw. 2020;130:176\u201384.","journal-title":"Neural Netw"},{"key":"384_CR17","first-page":"1","volume":"72","author":"Z Wang","year":"2023","unstructured":"Wang Z, Zheng M, Liu PX. A novel classification method based on stacking ensemble for imbalanced problems. IEEE Trans Instrum Meas. 2023;72:1\u201313.","journal-title":"IEEE Trans Instrum Meas"},{"key":"384_CR18","doi-asserted-by":"publisher","first-page":"108511","DOI":"10.1016\/j.patcog.2021.108511","volume":"124","author":"S Maldonado","year":"2022","unstructured":"Maldonado S, Vairetti C, Fernandez A, Herrera F. FW-SMOTE: a feature-weighted oversampling approach for imbalanced classification. Pattern Recognit. 2022;124:108511.","journal-title":"Pattern Recognit"},{"issue":"2","key":"384_CR19","doi-asserted-by":"publisher","first-page":"1269","DOI":"10.1109\/TCYB.2020.3000754","volume":"52","author":"WWY Ng","year":"2022","unstructured":"Ng WWY, Xu S, Zhang J, Tian X, Rong TW, Kwong S. Hashing-based undersampling ensemble for imbalanced pattern classification problems. IEEE Trans Cybern. 2022;52(2):1269\u201379.","journal-title":"IEEE Trans Cybern"},{"key":"384_CR20","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1016\/j.neucom.2020.04.075","volume":"407","author":"P Peng","year":"2020","unstructured":"Peng P, Zhang W, Zhang Y, Xu YY, Wang HW, Zhang HM. Cost sensitive active learning using bidirectional gated recurrent neural networks for imbalanced fault diagnosis. Neurocomputing. 2020;407:232\u201345.","journal-title":"Neurocomputing"},{"key":"384_CR21","doi-asserted-by":"publisher","first-page":"113232","DOI":"10.1016\/j.eswa.2020.113232","volume":"147","author":"VH Alves Ribeiro","year":"2020","unstructured":"Alves Ribeiro VH, Reynoso-Meza G. Ensemble learning by means of a multi-objective optimization design approach for dealing with imbalanced data sets. Expert Syst Appl. 2020;147:113232.","journal-title":"Expert Syst Appl"},{"issue":"21","key":"384_CR22","doi-asserted-by":"publisher","first-page":"e7140","DOI":"10.1002\/cpe.7140","volume":"34","author":"B Parlak","year":"2022","unstructured":"Parlak B. Class-index corpus-index measure: a novel feature selection method for imbalanced text data. CONCURR COMP-PRACT E. 2022;34(21):e7140.","journal-title":"CONCURR COMP-PRACT E"},{"issue":"1","key":"384_CR23","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1186\/s12859-020-3411-3","volume":"21","author":"GH Fu","year":"2020","unstructured":"Fu GH, Wu YJ, Zong MJ, Pan J. Hellinger distance-based stable sparse feature selection for high-dimensional class-imbalanced data. BMC Bioinformatics. 2020;21(1):121.","journal-title":"BMC Bioinformatics"},{"issue":"1","key":"384_CR24","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1186\/s13040-023-00330-4","volume":"16","author":"T Kosolwattana","year":"2023","unstructured":"Kosolwattana T, Liu C, Hu R, Han S, Chen H, Lin Y. A self-inspected adaptive SMOTE algorithm (SASMOTE) for highly imbalanced data classification in healthcare. BioData Min. 2023;16(1):15.","journal-title":"BioData Min"},{"issue":"1","key":"384_CR25","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1186\/s13040-021-00283-6","volume":"14","author":"J Beinecke","year":"2021","unstructured":"Beinecke J, Heider D. Gaussian noise up-sampling is better suited than SMOTE and ADASYN for clinical decision making. BioData Min. 2021;14(1):49.","journal-title":"BioData Min"},{"key":"384_CR26","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1007\/978-3-319-58965-7_19","volume":"718","author":"B Parlak","year":"2018","unstructured":"Parlak B, Uysal AK. On feature weighting and selection for medical document classification. In developments and advances in intelligent systems and applications. Stud Comput Intell. 2018;718:269\u201382.","journal-title":"Stud Comput Intell"},{"key":"384_CR27","doi-asserted-by":"publisher","first-page":"1274","DOI":"10.1016\/j.csbj.2024.03.016","volume":"23","author":"J Labory","year":"2024","unstructured":"Labory J, Njomgue-Fotso E, Bottini S. Benchmarking feature selection and feature extraction methods to improve the performances of machine-learning algorithms for patient classification using metabolomics biomedical data. Comput Struct Biotechnol J. 2024;23:1274\u201387.","journal-title":"Comput Struct Biotechnol J"},{"issue":"1","key":"384_CR28","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1177\/0165551521991037","volume":"49","author":"B Parlak","year":"2023","unstructured":"Parlak B, Uysal AK. A novel filter feature selection method for text classification: extensive feature selector. J Inf Sci. 2023;49(1):59\u201378.","journal-title":"J Inf Sci"},{"key":"384_CR29","doi-asserted-by":"publisher","first-page":"107222","DOI":"10.1016\/j.knosys.2021.107222","volume":"227","author":"N Moniz","year":"2021","unstructured":"Moniz N, Monteiro H. No free lunch in imbalanced learning. Knowl-Based Syst. 2021;227:107222.","journal-title":"Knowl-Based Syst"},{"issue":"5","key":"384_CR30","doi-asserted-by":"publisher","first-page":"429","DOI":"10.3233\/IDA-2002-6504","volume":"6","author":"N Japkowicz","year":"2002","unstructured":"Japkowicz N, Stephen S. The class imbalance problem: a systematic study. Intell Data Anal. 2002;6(5):429\u201349.","journal-title":"Intell Data Anal"},{"issue":"1","key":"384_CR31","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GE Batista","year":"2004","unstructured":"Batista GE, A P A, Prati RC, Monard MC. A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explor Newsl. 2004;6(1):20\u20139.","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"384_CR32","unstructured":"Vimalraj S, Porkodi Dr R. A review on handling imbalanced data. 2018 International Conference on Current Trends towards Converging Technologies (ICCTCT). IEEE, 2018."},{"key":"384_CR33","doi-asserted-by":"publisher","first-page":"113504","DOI":"10.1016\/j.eswa.2020.113504","volume":"158","author":"JN Wei","year":"2020","unstructured":"Wei JN, Huang HS, Yao LG, Hu Y, Fan QS, Huang D. NI-MWMOTE: an improving noise-immunity majority weighted minority oversampling technique for imbalanced classification problems. Expert Syst Appl. 2020;158:113504.","journal-title":"Expert Syst Appl"},{"issue":"4","key":"384_CR34","doi-asserted-by":"publisher","first-page":"104","DOI":"10.4236\/jilsa.2015.74010","volume":"7","author":"M Beckmann","year":"2015","unstructured":"Beckmann M, Ebecken NFF, De Lima BSLP. A KNN undersampling approach for data balancing. JILSA. 2015;7(4):104.","journal-title":"JILSA"},{"issue":"1","key":"384_CR35","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GE Batista","year":"2004","unstructured":"Batista GE, Prati RC, Monard MC. A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explor Newsl. 2004;6(1):20\u20139.","journal-title":"ACM SIGKDD Explor Newsl"},{"issue":"1","key":"384_CR36","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1186\/1756-0381-6-16","volume":"6","author":"M Nakamura","year":"2013","unstructured":"Nakamura M, Kajiwara Y, Otsuka A, Kimura H. LVQ-SMOTE - learning vector quantization based synthetic minority over-sampling technique for biomedical data. BioData Min. 2013;6(1):16.","journal-title":"BioData Min"},{"key":"384_CR37","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1186\/s13040-016-0117-1","volume":"9","author":"J Li","year":"2016","unstructured":"Li J, Fong S, Sung Y, Cho K, Wong R, Wong KKL. Adaptive swarm cluster-based dynamic multi-objective synthetic minority oversampling technique algorithm for tackling binary imbalanced datasets in biomedical data classification. BioData Min. 2016;9:37.","journal-title":"BioData Min"},{"issue":"1","key":"384_CR38","doi-asserted-by":"publisher","first-page":"e0296107","DOI":"10.1371\/journal.pone.0296107","volume":"19","author":"RM Munshi","year":"2024","unstructured":"Munshi RM. Novel ensemble learning approach with SVM-imputed ADASYN features for enhanced cervical cancer prediction. PLoS ONE. 2024;19(1):e0296107.","journal-title":"PLoS ONE"},{"key":"384_CR39","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1016\/j.jtbi.2017.03.031","volume":"422","author":"C Jia","year":"2017","unstructured":"Jia C, Zuo Y, S-SulfPred:. A sensitive predictor to capture S-sulfenylation sites based on a resampling one-sided selection undersampling-synthetic minority oversampling technique. J Theor Biol. 2017;422:84\u20139.","journal-title":"J Theor Biol"},{"key":"384_CR40","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.patrec.2016.10.006","volume":"93","author":"D Devi","year":"2017","unstructured":"Devi D, Biswas SK, Purkayastha B. Redundancy-driven modified Tomek-link based undersampling: a solution to class imbalance. Pattern Recognit Lett. 2017;93:3\u201312.","journal-title":"Pattern Recognit Lett"},{"issue":"002","key":"384_CR41","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1007\/s11704-009-0027-1","volume":"3","author":"L Zhou","year":"2009","unstructured":"Zhou L, Lai KK. Benchmarking binary classification models on data sets with different degrees of imbalance. Front Comput Sci Chi. 2009;3(002):205\u201316.","journal-title":"Front Comput Sci Chi"},{"key":"384_CR42","doi-asserted-by":"publisher","first-page":"106420","DOI":"10.1016\/j.cmpb.2021.106420","volume":"211","author":"H Yang","year":"2021","unstructured":"Yang H, Li XX, Cao HY, Cui YH, Luo YH, Liu JC. Using machine learning methods to predict hepatic encephalopathy in cirrhotic patients with unbalanced data. Comput Meth Prog Bio. 2021;211:106420.","journal-title":"Comput Meth Prog Bio"},{"key":"384_CR43","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.ins.2022.02.038","volume":"595","author":"AM Zhang","year":"2022","unstructured":"Zhang AM, Yu HL, Huan ZJ, Yang XB, Zheng S, Gao S. SMOTE-RkNN: a hybrid re-sampling method based on SMOTE and reverse k-nearest neighbors. Inf Sci. 2022;595:70\u201388.","journal-title":"Inf Sci"},{"key":"384_CR44","doi-asserted-by":"publisher","first-page":"114986","DOI":"10.1016\/j.eswa.2021.114986","volume":"178","author":"A \u00d6zdemir","year":"2021","unstructured":"\u00d6zdemir A, Polat K, Alhudhaif A. Classification of imbalanced hyperspectral images using SMOTE-based deep learning methods. Expert Syst Appl. 2021;178:114986.","journal-title":"Expert Syst Appl"}],"container-title":["BioData Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13040-024-00384-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13040-024-00384-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13040-024-00384-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T08:08:04Z","timestamp":1725437284000},"score":1,"resource":{"primary":{"URL":"https:\/\/biodatamining.biomedcentral.com\/articles\/10.1186\/s13040-024-00384-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,4]]},"references-count":44,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["384"],"URL":"https:\/\/doi.org\/10.1186\/s13040-024-00384-y","relation":{},"ISSN":["1756-0381"],"issn-type":[{"value":"1756-0381","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,4]]},"assertion":[{"value":"21 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 September 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The ethical matters of biomedical research involving humans in this project meet the requirements of the Declaration of Helsinki and the Measures for Ethical Review of Life Science and Medical Research Involving Humans. The Medical Ethics Review Committee of Jiangxi Provincial Maternal and Child Health Hospital approved the implementation of this project according to the research plan (SZYX-202305). Signed informed consent was obtained from all patients before enrollment.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"All authors have read and agreed to the published version of the manuscript.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"29"}}