{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T00:31:27Z","timestamp":1778200287032,"version":"3.51.4"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["306395\/2017-7"],"award-info":[{"award-number":["306395\/2017-7"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","award":["001"],"award-info":[{"award-number":["001"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s10115-022-01772-8","type":"journal-article","created":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T15:02:52Z","timestamp":1668006172000},"page":"31-57","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":169,"title":["Imbalanced data preprocessing techniques for machine learning: a systematic mapping study"],"prefix":"10.1007","volume":"65","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9178-2380","authenticated-orcid":false,"given":"Vitor","family":"Werner\u00a0de\u00a0Vargas","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9775-5842","authenticated-orcid":false,"given":"Jorge Arthur","family":"Schneider\u00a0Aranda","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7929-1984","authenticated-orcid":false,"given":"Ricardo","family":"dos Santos\u00a0Costa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9599-2476","authenticated-orcid":false,"given":"Paulo Ricardo","family":"da Silva\u00a0Pereira","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0358-2056","authenticated-orcid":false,"given":"Jorge Luis","family":"Vict\u00f3ria\u00a0Barbosa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,9]]},"reference":[{"key":"1772_CR1","doi-asserted-by":"publisher","first-page":"145435","DOI":"10.1109\/ACCESS.2019.2945034","volume":"7","author":"C Zhang","year":"2019","unstructured":"Zhang C, Zhou Y, Deng Y (2019) VCOS: a novel synergistic oversampling algorithm in binary imbalance classification. IEEE Access 7:145435\u2013145443. https:\/\/doi.org\/10.1109\/ACCESS.2019.2945034","journal-title":"IEEE Access"},{"key":"1772_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2018.12.003","volume":"90","author":"S Fotouhi","year":"2019","unstructured":"Fotouhi S, Asadi S, Kattan MW (2019) A comprehensive data level analysis for cancer diagnosis on imbalanced data. J Biomed Inform 90:103089. https:\/\/doi.org\/10.1016\/j.jbi.2018.12.003","journal-title":"J Biomed Inform"},{"issue":"2\u20134","key":"1772_CR3","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1504\/IJIIDS.2020.109463","volume":"13","author":"G Rekha","year":"2020","unstructured":"Rekha G, Krishna Reddy V, Tyagi AK (2020) An Earth mover\u2019s distance-based undersampling approach for handling class-imbalanced data. Int J Intell Inf Database Syst 13(2\u20134):376\u2013392. https:\/\/doi.org\/10.1504\/IJIIDS.2020.109463","journal-title":"Int J Intell Inf Database Syst"},{"key":"1772_CR4","doi-asserted-by":"publisher","unstructured":"Wong GY, Leung FHF, Ling SH (2014) A novel evolutionary preprocessing method based on over-sampling and under-sampling for imbalanced datasets. In: IECON 2013\u201439th annual conference of the IEEE industrial electronics society, pp. 2354\u20132359. IEEE, Vienna, Austria. https:\/\/doi.org\/10.1109\/IECON.2013.6699499","DOI":"10.1109\/IECON.2013.6699499"},{"issue":"4","key":"1772_CR5","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1007\/s10111-017-0447-x","volume":"19","author":"J Zhang","year":"2017","unstructured":"Zhang J, Cui X, Li J, Wang R (2017) Imbalanced classification of mental workload using a cost-sensitive majority weighted minority oversampling strategy. Cogn Technol Work 19(4):633\u2013653. https:\/\/doi.org\/10.1007\/s10111-017-0447-x","journal-title":"Cogn Technol Work"},{"key":"1772_CR6","doi-asserted-by":"publisher","unstructured":"Dong Y, Wang X (2011) A new over-sampling approach: random-SMOTE for learning from imbalanced data sets. In: KSEM 2011: 5th international conference on knowledge science, engineering and management, pp. 343\u2013352. Springer, Irvine, USA. https:\/\/doi.org\/10.1007\/978-3-642-25975-3_30","DOI":"10.1007\/978-3-642-25975-3_30"},{"key":"1772_CR7","doi-asserted-by":"publisher","unstructured":"Zhao SX, Wang XL, Yue QS (2020) A novel mixed sampling algorithm for imbalanced data based on XGBoost. In: CWSN 2020: 14th China conference on wireless sensor networks, pp 181\u2013196. Springer, Dunhuang, China. https:\/\/doi.org\/10.1007\/978-981-33-4214-9_14","DOI":"10.1007\/978-981-33-4214-9_14"},{"issue":"4","key":"1772_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3343440","volume":"52","author":"H Kaur","year":"2019","unstructured":"Kaur H, Pannu HS, Malhi AK (2019) A systematic review on imbalanced data challenges in machine learning: applications and solutions. ACM Comput Surv 52(4):1\u201336. https:\/\/doi.org\/10.1145\/3343440","journal-title":"ACM Comput Surv"},{"issue":"6","key":"1772_CR9","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1049\/iet-sen.2018.5193","volume":"13","author":"EA Felix","year":"2019","unstructured":"Felix EA, Lee SP (2019) Systematic literature review of preprocessing techniques for imbalanced data. IET Softw 13(6):479\u2013496. https:\/\/doi.org\/10.1049\/iet-sen.2018.5193","journal-title":"IET Softw"},{"key":"1772_CR10","doi-asserted-by":"publisher","unstructured":"Spelmen VS, Porkodi R (2018) A review on handling imbalanced data. In: 2018 international conference on current trends towards converging technologies (ICCTCT), pp 1\u201311. IEEE, Coimbatore, India. https:\/\/doi.org\/10.1109\/ICCTCT.2018.8551020","DOI":"10.1109\/ICCTCT.2018.8551020"},{"issue":"4","key":"1772_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/eng2.12298","volume":"3","author":"S Susan","year":"2020","unstructured":"Susan S, Kumar A (2020) The balancing trick: optimized sampling of imbalanced datasets\u2014a brief survey of the recent State of the Art. Eng Rep 3(4):1\u201324. https:\/\/doi.org\/10.1002\/eng2.12298","journal-title":"Eng Rep"},{"key":"1772_CR12","doi-asserted-by":"publisher","unstructured":"Shakeel F, Sabhitha AS, Sharma S (2017) Exploratory review on class imbalance problem: an overview. In: 2017 8th international conference on computing, communication and networking technologies (ICCCNT), pp 1\u20138. IEEE, Delhi, India. https:\/\/doi.org\/10.1109\/ICCCNT.2017.8204150","DOI":"10.1109\/ICCCNT.2017.8204150"},{"key":"1772_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0192-5","volume":"6","author":"JM Johnson","year":"2019","unstructured":"Johnson JM, Khoshgoftaar TM (2019) Survey on deep learning with class imbalance. J Big Data 6:1\u201354. https:\/\/doi.org\/10.1186\/s40537-019-0192-5","journal-title":"J Big Data"},{"key":"1772_CR14","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1007\/s10044-014-0392-8","volume":"17","author":"Q Li","year":"2014","unstructured":"Li Q, Mao Y (2014) A review of boosting methods for imbalanced data classification. Pattern Anal Appl 17:679\u2013693. https:\/\/doi.org\/10.1007\/s10044-014-0392-8","journal-title":"Pattern Anal Appl"},{"key":"1772_CR15","doi-asserted-by":"publisher","unstructured":"Buda M, Maki A, Mazurowski MA (2018) A systematic study of the class imbalance problem in convolutional neural networks. Neural Netw 106:249\u2013259 arXiv:1710.05381. https:\/\/doi.org\/10.1016\/j.neunet.2018.07.011","DOI":"10.1016\/j.neunet.2018.07.011"},{"issue":"1","key":"1772_CR16","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1007\/s42786-020-00020-3","volume":"4","author":"S Bhatore","year":"2020","unstructured":"Bhatore S, Mohan L, Reddy YR (2020) Machine learning techniques for credit risk evaluation: a systematic literature review. J Bank Financ Technol 4(1):111\u2013138. https:\/\/doi.org\/10.1007\/s42786-020-00020-3","journal-title":"J Bank Financ Technol"},{"issue":"10","key":"1772_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.jstrokecerebrovasdis.2020.105162","volume":"29","author":"MS Sirsat","year":"2020","unstructured":"Sirsat MS, Ferm\u00e9 E, C\u00e2mara J (2020) Machine learning for brain stroke: a review. J Stroke Cerebrovasc Dis 29(10):105162. https:\/\/doi.org\/10.1016\/j.jstrokecerebrovasdis.2020.105162","journal-title":"J Stroke Cerebrovasc Dis"},{"key":"1772_CR18","doi-asserted-by":"publisher","unstructured":"Thanoun MY, Yaseen MT (2020) A comparative study of Parkinson disease diagnosis in machine learning. In: ICAAI 2020: 2020 the 4th international conference on advances in artificial intelligence, pp 23\u201328. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3441417.3441425","DOI":"10.1145\/3441417.3441425"},{"key":"1772_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-020-09813-6","author":"G Chugh","year":"2021","unstructured":"Chugh G, Kumar S, Singh N (2021) Survey on machine learning and deep learning applications in breast cancer diagnosis. Cogn Comput. https:\/\/doi.org\/10.1007\/s12559-020-09813-6","journal-title":"Cogn Comput"},{"key":"1772_CR20","doi-asserted-by":"publisher","first-page":"15209","DOI":"10.1007\/s11042-018-7044-8","volume":"79","author":"U Ishtiaq","year":"2020","unstructured":"Ishtiaq U, Abdul Kareem S, Abdullah ERMF, Mujtaba G, Jahangir R, Ghafoor HY (2020) Diabetic retinopathy detection through artificial intelligent techniques: a review and open issues. Multimed Tools Appl 79:15209\u201315252. https:\/\/doi.org\/10.1007\/s11042-018-7044-8","journal-title":"Multimed Tools Appl"},{"key":"1772_CR21","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1016\/j.patcog.2018.05.014","volume":"83","author":"Z Hu","year":"2018","unstructured":"Hu Z, Tang J, Wang Z, Zhang K, Zhang L, Sun Q (2018) Deep learning for image-based cancer detection and diagnosis\u2014a survey. Pattern Recogn 83:134\u2013149. https:\/\/doi.org\/10.1016\/j.patcog.2018.05.014","journal-title":"Pattern Recogn"},{"key":"1772_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2020.105635","volume":"195","author":"H Benhar","year":"2020","unstructured":"Benhar H, Idri A, Fern\u00e1ndez-Alem\u00e1n JL (2020) Data preprocessing for heart disease classification: a systematic literature review. Comput Methods Programs Biomed 195:105635. https:\/\/doi.org\/10.1016\/j.cmpb.2020.105635","journal-title":"Comput Methods Programs Biomed"},{"key":"1772_CR23","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.cmpb.2018.05.007","volume":"162","author":"A Idri","year":"2018","unstructured":"Idri A, Benhar H, Fern\u00e1ndez-Alem\u00e1n JL, Kadi I (2018) A systematic map of medical data preprocessing in knowledge discovery. Comput Methods Programs Biomed 162:69\u201385. https:\/\/doi.org\/10.1016\/j.cmpb.2018.05.007","journal-title":"Comput Methods Programs Biomed"},{"key":"1772_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.ymssp.2019.106587","volume":"138","author":"Y Lei","year":"2020","unstructured":"Lei Y, Yang B, Jiang X, Jia F, Li N, Nandi AK (2020) Applications of machine learning to machine fault diagnosis: a review and roadmap. Mech Syst Signal Process 138:106587. https:\/\/doi.org\/10.1016\/j.ymssp.2019.106587","journal-title":"Mech Syst Signal Process"},{"key":"1772_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.isatra.2021.02.042","author":"T Zhang","year":"2021","unstructured":"Zhang T, Chen J, Li F, Zhang K, Lv H, He S, Xu E (2021) Intelligent fault diagnosis of machines with small and imbalanced data: a state-of-the-art review and possible extensions. ISA Trans. https:\/\/doi.org\/10.1016\/j.isatra.2021.02.042","journal-title":"ISA Trans"},{"key":"1772_CR26","doi-asserted-by":"publisher","unstructured":"Amarasinghe T, Aponso A, Krishnarajah N (2018) Critical analysis of machine learning based approaches for fraud detection in financial transactions. In: ICMLT\u201918: Proceedings of the 2018 international conference on machine learning technologies, pp 12\u201317. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3231884.3231894","DOI":"10.1145\/3231884.3231894"},{"key":"1772_CR27","doi-asserted-by":"publisher","unstructured":"Priscilla CV, Prabha DP (2019) Credit card fraud detection: a systematic review. In: Proceedings of the first international conference on innovative computing and cutting-edge technologies (ICICCT 2019), pp 290\u2013303. Springer, Istanbul, Turkey. https:\/\/doi.org\/10.1007\/978-3-030-38501-9_29","DOI":"10.1007\/978-3-030-38501-9_29"},{"issue":"3","key":"1772_CR28","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1049\/iet-sen.2017.0148","volume":"12","author":"Z Li","year":"2018","unstructured":"Li Z, Jing XY, Zhu X (2018) Progress on approaches to software defect prediction. IET Softw 12(3):161\u2013175. https:\/\/doi.org\/10.1049\/iet-sen.2017.0148","journal-title":"IET Softw"},{"key":"1772_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114595","volume":"172","author":"SK Pandey","year":"2021","unstructured":"Pandey SK, Mishra RB, Tripathi AK (2021) Machine learning based methods for software fault prediction: a survey. Expert Syst Appl 172:114595. https:\/\/doi.org\/10.1016\/j.eswa.2021.114595","journal-title":"Expert Syst Appl"},{"key":"1772_CR30","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1016\/j.asoc.2014.11.023","volume":"27","author":"R Malhotra","year":"2015","unstructured":"Malhotra R (2015) A systematic review of machine learning techniques for software fault prediction. Appl Soft Comput 27:504\u2013518. https:\/\/doi.org\/10.1016\/j.asoc.2014.11.023","journal-title":"Appl Soft Comput"},{"issue":"7","key":"1772_CR31","doi-asserted-by":"publisher","first-page":"10206","DOI":"10.1016\/j.eswa.2009.02.037","volume":"36","author":"TS Guzella","year":"2009","unstructured":"Guzella TS, Caminhas WM (2009) A review of machine learning approaches to Spam filtering. Expert Syst Appl 36(7):10206\u201310222. https:\/\/doi.org\/10.1016\/j.eswa.2009.02.037","journal-title":"Expert Syst Appl"},{"issue":"8","key":"1772_CR32","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1016\/j.infsof.2010.03.006","volume":"52","author":"B Kitchenham","year":"2010","unstructured":"Kitchenham B, Pretorius R, Budgen D, Brereton OP, Turner M, Niazi M, Linkman S (2010) Systematic literature reviews in software engineering\u2014a tertiary study. Inf Softw Technol 52(8):792\u2013805. https:\/\/doi.org\/10.1016\/j.infsof.2010.03.006","journal-title":"Inf Softw Technol"},{"issue":"1","key":"1772_CR33","doi-asserted-by":"publisher","first-page":"76","DOI":"10.3163\/1536-5050.104.1.013","volume":"104","author":"ID Cooper","year":"2016","unstructured":"Cooper ID (2016) What is a \u201cmapping study?\u2019\u2019. J Med Libr Assoc 104(1):76\u201378. https:\/\/doi.org\/10.3163\/1536-5050.104.1.013","journal-title":"J Med Libr Assoc"},{"key":"1772_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.infsof.2015.03.007","volume":"64","author":"K Petersen","year":"2015","unstructured":"Petersen K, Vakkalanka S, Kuzniarz L (2015) Guidelines for conducting systematic mapping studies in software engineering: an update. Inf Softw Technol 64:1\u201318. https:\/\/doi.org\/10.1016\/j.infsof.2015.03.007","journal-title":"Inf Softw Technol"},{"key":"1772_CR35","doi-asserted-by":"publisher","first-page":"199503","DOI":"10.1109\/access.2020.3034588","volume":"8","author":"LG De Almeida","year":"2020","unstructured":"De Almeida LG, Souza ADD, Kuehne BT, Gomes OSM (2020) Data analysis techniques in vehicle communication networks: systematic mapping of literature. IEEE Access 8:199503\u2013199512. https:\/\/doi.org\/10.1109\/access.2020.3034588","journal-title":"IEEE Access"},{"issue":"3","key":"1772_CR36","doi-asserted-by":"publisher","first-page":"3609","DOI":"10.1109\/JSYST.2020.2980896","volume":"14","author":"RDA Silva","year":"2020","unstructured":"Silva RDA, Braga RTV (2020) Simulating systems-of-systems with agent-based modeling: a systematic literature review. IEEE Syst J 14(3):3609\u20133617. https:\/\/doi.org\/10.1109\/JSYST.2020.2980896","journal-title":"IEEE Syst J"},{"issue":"3","key":"1772_CR37","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1145\/1273445.1273458","volume":"37","author":"S Keshav","year":"2007","unstructured":"Keshav S (2007) How to read a paper. ACM SIGCOMM Comput Commun Rev 37(3):83\u201384. https:\/\/doi.org\/10.1145\/1273445.1273458","journal-title":"ACM SIGCOMM Comput Commun Rev"},{"issue":"1","key":"1772_CR38","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1016\/j.artmed.2005.03.002","volume":"37","author":"G Cohen","year":"2006","unstructured":"Cohen G, Hilario M, Sax H, Hugonnet S, Geissbuhler A (2006) Learning from imbalanced data in surveillance of nosocomial infection. Artif Intell Med 37(1):7\u201318. https:\/\/doi.org\/10.1016\/j.artmed.2005.03.002","journal-title":"Artif Intell Med"},{"issue":"8","key":"1772_CR39","doi-asserted-by":"publisher","first-page":"1900131","DOI":"10.1002\/minf.201900131","volume":"39","author":"YO Lee","year":"2020","unstructured":"Lee YO, Kim YJ (2020) The effect of resampling on data-imbalanced conditions for prediction towards nuclear receptor profiling using deep learning. Mol Inf 39(8):1900131. https:\/\/doi.org\/10.1002\/minf.201900131","journal-title":"Mol Inf"},{"issue":"5","key":"1772_CR40","doi-asserted-by":"publisher","first-page":"6911","DOI":"10.1007\/s11042-020-10024-2","volume":"80","author":"A Mahadevan","year":"2021","unstructured":"Mahadevan A, Arock M (2021) A class imbalance-aware review rating prediction using hybrid sampling and ensemble learning. Multimed Tools Appl 80(5):6911\u20136938. https:\/\/doi.org\/10.1007\/s11042-020-10024-2","journal-title":"Multimed Tools Appl"},{"issue":"2","key":"1772_CR41","doi-asserted-by":"publisher","first-page":"685","DOI":"10.18517\/ijaseit.9.2.8615","volume":"9","author":"Z Rustam","year":"2019","unstructured":"Rustam Z, Utami DA, Hidayat R, Pandelaki J, Nugroho WA (2019) Hybrid preprocessing method for support vector machine for classification of imbalanced cerebral infarction datasets. Int J Adv Sci Eng Inf Technol 9(2):685\u2013691. https:\/\/doi.org\/10.18517\/ijaseit.9.2.8615","journal-title":"Int J Adv Sci Eng Inf Technol"},{"key":"1772_CR42","doi-asserted-by":"publisher","unstructured":"Chang Q, Lin S, Liu X (2019) Stacked-SVM: a dynamic SVM framework for telephone fraud identification from imbalanced CDRs. In: ACAI 2019: proceedings of the 2019 2nd international conference on algorithms, computing and artificial intelligence, vol 9, pp 112\u2013120. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3377713.3377735","DOI":"10.1145\/3377713.3377735"},{"issue":"12","key":"1772_CR43","doi-asserted-by":"publisher","first-page":"3687","DOI":"10.1007\/s13042-019-00953-2","volume":"10","author":"X Han","year":"2019","unstructured":"Han X, Cui R, Lan Y, Kang Y, Deng J, Jia N (2019) A Gaussian mixture model based combined resampling algorithm for classification of imbalanced credit data sets. Int J Mach Learn Cybern 10(12):3687\u20133699. https:\/\/doi.org\/10.1007\/s13042-019-00953-2","journal-title":"Int J Mach Learn Cybern"},{"issue":"7","key":"1772_CR44","doi-asserted-by":"publisher","first-page":"1060","DOI":"10.1057\/jors.2012.120","volume":"64","author":"AI Marqu\u00e9s","year":"2013","unstructured":"Marqu\u00e9s AI, Garc\u00eda V, S\u00e1nchez JS (2013) On the suitability of resampling techniques for the class imbalance problem in credit scoring. J Oper Res Soc 64(7):1060\u20131070. https:\/\/doi.org\/10.1057\/jors.2012.120","journal-title":"J Oper Res Soc"},{"key":"1772_CR45","doi-asserted-by":"publisher","unstructured":"Pereira RM, Bertolini D, Teixeira LO, Silla CN, Costa YMG (2020) COVID-19 identification in chest X-ray images on flat and hierarchical classification scenarios. Comput Methods Programs Biomed 194:105532 arXiv:2004.05835. https:\/\/doi.org\/10.1016\/j.cmpb.2020.105532","DOI":"10.1016\/j.cmpb.2020.105532"},{"key":"1772_CR46","doi-asserted-by":"publisher","unstructured":"Vu L, Van Tra D, Nguyen QU (2016) Learning from imbalanced data for encrypted traffic identification problem. In: SoICT\u201916: proceedings of the seventh symposium on information and communication technology, pp 147\u2013152. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3011077.3011132","DOI":"10.1145\/3011077.3011132"},{"key":"1772_CR47","doi-asserted-by":"publisher","unstructured":"Shamsudin H, Yusof UK, Jayalakshmi A, Akmal\u00a0Khalid MN (2020) Combining oversampling and undersampling techniques for imbalanced classification: a comparative study using credit card fraudulent transaction dataset. In: 2020 IEEE 16th international conference on control and automation (ICCA), pp 803\u2013808. IEEE, Singapore. https:\/\/doi.org\/10.1109\/ICCA51439.2020.9264517","DOI":"10.1109\/ICCA51439.2020.9264517"},{"key":"1772_CR48","doi-asserted-by":"publisher","unstructured":"Haldar S, Mukherjee R, Chakraborty P, Banerjee S, Chaudhury S, Chatterjee S (2019) Improved epilepsy detection method by addressing class imbalance problem. In: 2018 IEEE 9th annual information technology, electronics and mobile communication conference (IEMCON), pp 934\u2013939. IEEE, Vancouver, BC, Canada. https:\/\/doi.org\/10.1109\/IEMCON.2018.8614826","DOI":"10.1109\/IEMCON.2018.8614826"},{"key":"1772_CR49","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.neucom.2018.04.090","volume":"343","author":"R Malhotra","year":"2019","unstructured":"Malhotra R, Kamal S (2019) An empirical study to investigate oversampling methods for improving software defect prediction using imbalanced data. Neurocomputing 343:120\u2013140. https:\/\/doi.org\/10.1016\/j.neucom.2018.04.090","journal-title":"Neurocomputing"},{"issue":"4","key":"1772_CR50","doi-asserted-by":"publisher","first-page":"1581","DOI":"10.1007\/s11219-020-09525-y","volume":"28","author":"R Malhotra","year":"2020","unstructured":"Malhotra R, Lata K (2020) An empirical study on predictability of software maintainability using imbalanced data. Softw Qual J 28(4):1581\u20131614. https:\/\/doi.org\/10.1007\/s11219-020-09525-y","journal-title":"Softw Qual J"},{"key":"1772_CR51","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-019-09657-9","author":"J Ma","year":"2019","unstructured":"Ma J, Afolabi DO, Ren J, Zhen A (2019) Predicting seminal quality via imbalanced learning with evolutionary safe-level synthetic minority over-sampling technique. Cogn Comput. https:\/\/doi.org\/10.1007\/s12559-019-09657-9","journal-title":"Cogn Comput"},{"issue":"6","key":"1772_CR52","doi-asserted-by":"publisher","first-page":"2694","DOI":"10.1118\/1.4948499","volume":"43","author":"S Yan","year":"2016","unstructured":"Yan S, Qian W, Guan Y, Zheng B (2016) Improving lung cancer prognosis assessment by incorporating synthetic minority oversampling technique and score fusion method. Med Phys 43(6):2694\u20132703. https:\/\/doi.org\/10.1118\/1.4948499","journal-title":"Med Phys"},{"key":"1772_CR53","doi-asserted-by":"publisher","unstructured":"Purnami SW, Trapsilasiwi RK (2017) SMOTE-least square support vector machine for classification of multiclass imbalanced data. In: ICMLC 2017: proceedings of the 9th international conference on machine learning and computing, pp 107\u2013111. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3055635.3056581","DOI":"10.1145\/3055635.3056581"},{"key":"1772_CR54","doi-asserted-by":"publisher","unstructured":"Dewi C, Firdaus\u00a0Mahmudy W, Arifando R, Kusuma\u00a0Arbawa Y, Labique\u00a0Ahmadie B, Labique B (2020) Improve performance of extreme learning machine in classification of patchouli varieties with imbalanced class. In: SIET\u201920: proceedings of the 5th international conference on sustainable information engineering and technology, pp 16\u201322. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3427423.3427424","DOI":"10.1145\/3427423.3427424"},{"issue":"3","key":"1772_CR55","doi-asserted-by":"publisher","first-page":"774","DOI":"10.1109\/TCBB.2018.2871674","volume":"16","author":"X Zhang","year":"2019","unstructured":"Zhang X, Lin X, Zhao J, Huang Q, Xu X (2019) Efficiently predicting hot spots in PPIs by combining random forest and synthetic minority over-sampling technique. IEEE\/ACM Trans Comput Biol Bioinf 16(3):774\u2013781. https:\/\/doi.org\/10.1109\/TCBB.2018.2871674","journal-title":"IEEE\/ACM Trans Comput Biol Bioinf"},{"issue":"2","key":"1772_CR56","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/exsy.12363","volume":"36","author":"A Gici\u0107","year":"2018","unstructured":"Gici\u0107 A, Subasi A (2018) Credit scoring for a microcredit data set using the synthetic minority oversampling technique and ensemble classifiers. Expert Syst 36(2):1\u201322. https:\/\/doi.org\/10.1111\/exsy.12363","journal-title":"Expert Syst"},{"issue":"4","key":"1772_CR57","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TDEI.2019.008034","volume":"26","author":"V Tra","year":"2019","unstructured":"Tra V, Duong BP, Kim JM (2019) Improving diagnostic performance of a power transformer using an adaptive over-sampling method for imbalanced data. IEEE Trans Dielectr Electr Insul 26(4):1325\u20131333. https:\/\/doi.org\/10.1109\/TDEI.2019.008034","journal-title":"IEEE Trans Dielectr Electr Insul"},{"key":"1772_CR58","doi-asserted-by":"publisher","first-page":"106595","DOI":"10.1016\/j.ijepes.2020.106595","volume":"126 Part A","author":"N Jiang","year":"2021","unstructured":"Jiang N, Li N (2021) A wind turbine frequent principal fault detection and localization approach with imbalanced data using an improved synthetic oversampling technique. Int J Electr Power Energy Syst 126 Part A:106595. https:\/\/doi.org\/10.1016\/j.ijepes.2020.106595","journal-title":"Int J Electr Power Energy Syst"},{"issue":"1","key":"1772_CR59","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/s13748-019-00197-9","volume":"9","author":"H Faris","year":"2020","unstructured":"Faris H, Abukhurma R, Almanaseer W, Saadeh M, Mora AM, Castillo PA, Aljarah I (2020) Improving financial bankruptcy prediction in a highly imbalanced class distribution using oversampling and ensemble learning: a case from the Spanish market. Prog Artif Intell 9(1):31\u201353. https:\/\/doi.org\/10.1007\/s13748-019-00197-9","journal-title":"Prog Artif Intell"},{"issue":"5","key":"1772_CR60","doi-asserted-by":"publisher","first-page":"1067","DOI":"10.1007\/s10796-020-10031-6","volume":"22","author":"S Smiti","year":"2020","unstructured":"Smiti S, Soui M (2020) Bankruptcy prediction using deep learning approach based on borderline SMOTE. Inf Syst Front 22(5):1067\u20131083. https:\/\/doi.org\/10.1007\/s10796-020-10031-6","journal-title":"Inf Syst Front"},{"key":"1772_CR61","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswax.2019.100003","volume":"1","author":"J Jiang","year":"2019","unstructured":"Jiang J, Zhang H, Pi D, Dai C (2019) A novel multi-module neural network system for imbalanced heartbeats classification. Expert Syst Appl X 1:100003. https:\/\/doi.org\/10.1016\/j.eswax.2019.100003","journal-title":"Expert Syst Appl X"},{"key":"1772_CR62","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/j.jbi.2015.09.012","volume":"58","author":"MS Santos","year":"2015","unstructured":"Santos MS, Abreu PH, Garc\u00eda-Laencina PJ, Sim\u00e3o A, Carvalho A (2015) A new cluster-based oversampling method for improving survival prediction of hepatocellular carcinoma patients. J Biomed Inform 58:49\u201359. https:\/\/doi.org\/10.1016\/j.jbi.2015.09.012","journal-title":"J Biomed Inform"},{"key":"1772_CR63","doi-asserted-by":"publisher","unstructured":"Tashkandi A, Wiese L (2019) A hybrid machine learning approach for improving mortality risk prediction on imbalanced data. In: iiWAS2019: proceedings of the 21st international conference on information integration and web-based applications and services, pp 83\u201392. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3366030.3366040","DOI":"10.1145\/3366030.3366040"},{"key":"1772_CR64","doi-asserted-by":"publisher","unstructured":"Zhou Q, Sun B, Song Y, Li S (2020) K-means clustering based undersampling for lower back pain data. In: ICBDT 2020: proceedings of the 2020 3rd international conference on big data technologies, pp 53\u201357. ACM, New York, USA. https:\/\/doi.org\/10.1145\/3422713.3422725","DOI":"10.1145\/3422713.3422725"},{"key":"1772_CR65","doi-asserted-by":"publisher","first-page":"70111","DOI":"10.1109\/ACCESS.2020.2986356","volume":"8","author":"Q Liu","year":"2020","unstructured":"Liu Q, Ma G, Cheng C (2020) Data fusion generative adversarial network for multi-class imbalanced fault diagnosis of rotating machinery. IEEE Access 8:70111\u201370124. https:\/\/doi.org\/10.1109\/ACCESS.2020.2986356","journal-title":"IEEE Access"},{"key":"1772_CR66","doi-asserted-by":"publisher","unstructured":"Gangwar AK, Ravi V (2019) WiP: generative adversarial network for oversampling data in credit card fraud detection. In: ICISS 2019: 15th international conference on information systems security, vol 11952, pp 123\u2013134. Springer, Hyderabad, India. https:\/\/doi.org\/10.1007\/978-3-030-36945-3","DOI":"10.1007\/978-3-030-36945-3"},{"key":"1772_CR67","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.109689","volume":"210","author":"K Yan","year":"2020","unstructured":"Yan K, Huang J, Shen W, Ji Z (2020) Unsupervised learning for fault detection and diagnosis of air handling units. Energy Build 210:109689. https:\/\/doi.org\/10.1016\/j.enbuild.2019.109689","journal-title":"Energy Build"},{"issue":"11","key":"1772_CR68","doi-asserted-by":"publisher","first-page":"2209","DOI":"10.1049\/iet-gtd.2019.1388","volume":"14","author":"H Wang","year":"2020","unstructured":"Wang H, Ye W (2020) Transient stability evaluation model based on SSDAE with imbalanced correction. IET Gener Transm Distrib 14(11):2209\u20132216. https:\/\/doi.org\/10.1049\/iet-gtd.2019.1388","journal-title":"IET Gener Transm Distrib"},{"key":"1772_CR69","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2020.101815","volume":"104","author":"N Nnamoko","year":"2020","unstructured":"Nnamoko N, Korkontzelos I (2020) Efficient treatment of outliers and class imbalance for diabetes prediction. Artif Intell Med 104:101815. https:\/\/doi.org\/10.1016\/j.artmed.2020.101815","journal-title":"Artif Intell Med"},{"key":"1772_CR70","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.cose.2016.12.004","volume":"69","author":"S Liu","year":"2017","unstructured":"Liu S, Wang Y, Zhang J, Chen C, Xiang Y (2017) Addressing the class imbalance problem in Twitter spam detection using ensemble learning. Comput Secur 69:35\u201349. https:\/\/doi.org\/10.1016\/j.cose.2016.12.004","journal-title":"Comput Secur"},{"key":"1772_CR71","doi-asserted-by":"publisher","unstructured":"Filho AH, Concatto F, Nau J, Prado HAD, Imhof DO, Ferneda E (2019) Imbalanced learning techniques for improving the performance of statistical models in automated essay scoring. In: Knowledge-based and intelligent information & engineering systems: proceedings of the 23rd international conference KES2019, vol 159, pp 764\u2013773. Elsevier B.V., Budapest, Hungary. https:\/\/doi.org\/10.1016\/j.procs.2019.09.235","DOI":"10.1016\/j.procs.2019.09.235"},{"key":"1772_CR72","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.knosys.2012.12.007","volume":"41","author":"L Zhou","year":"2013","unstructured":"Zhou L (2013) Performance of corporate bankruptcy prediction models on imbalanced dataset: the effect of sampling methods. Knowl Based Syst 41:16\u201325. https:\/\/doi.org\/10.1016\/j.knosys.2012.12.007","journal-title":"Knowl Based Syst"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-022-01772-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10115-022-01772-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-022-01772-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T10:11:11Z","timestamp":1744193471000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10115-022-01772-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,9]]},"references-count":72,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["1772"],"URL":"https:\/\/doi.org\/10.1007\/s10115-022-01772-8","relation":{"is-referenced-by":[{"id-type":"doi","id":"10.1007\/s42452-025-08033-7","asserted-by":"object"}]},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,9]]},"assertion":[{"value":"8 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 September 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 October 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Not applicable.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}}]}}