{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T04:30:12Z","timestamp":1772166612548,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T00:00:00Z","timestamp":1748908800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T00:00:00Z","timestamp":1748908800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"DOI":"10.1186\/s40537-025-01188-5","type":"journal-article","created":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T12:03:48Z","timestamp":1748952228000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-organizing maps to evaluate optimal strategies for balancing binary class distributions: a methodological approach"],"prefix":"10.1186","volume":"12","author":[{"given":"Alberto","family":"Nogales","sequence":"first","affiliation":[]},{"given":"Diego","family":"Guadalupe","sequence":"additional","affiliation":[]},{"given":"\u00c1lvaro J.","family":"Garc\u00eda-Tejedor","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,3]]},"reference":[{"key":"1188_CR1","doi-asserted-by":"publisher","first-page":"1173","DOI":"10.1007\/978-3-319-16486-1_116","volume-title":"New contributions in information systems and technologies","author":"J P\u00e9rez","year":"2015","unstructured":"P\u00e9rez J, Iturbide E, Olivares V, Hidalgo M, Almanza N, Mart\u00ednez A. A data preparation methodology in data mining applied to mortality population databases. In: Rocha A, Correia AM, Costanzo S, Reis LP, editors. New contributions in information systems and technologies. Cham: Springer International Publishing; 2015. p. 1173\u201382."},{"key":"1188_CR2","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1109\/TPAMI.2018.2832629","volume":"41","author":"Q Dong","year":"2019","unstructured":"Dong Q, Gong S, Zhu X. Imbalanced deep learning by minority class incremental rectification. IEEE Trans Pattern Anal Mach Intell. 2019;41:1367\u201381.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1188_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0192-5","volume":"6","author":"JM Johnson","year":"2019","unstructured":"Johnson JM, Khoshgoftaar TM. Survey on deep learning with class imbalance. J Big Data. 2019;6:1\u201354.","journal-title":"J Big Data"},{"key":"1188_CR4","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/BF00337288","volume":"43","author":"T Kohonen","year":"1982","unstructured":"Kohonen T. Self-organized formation of topologically correct feature maps. Biol Cybern. 1982;43:59\u201369.","journal-title":"Biol Cybern"},{"key":"1188_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0925-2312(98)00030-7","volume":"21","author":"T Kohonen","year":"1998","unstructured":"Kohonen T. The self-organizing map. Neurocomputing. 1998;21:1\u20136.","journal-title":"Neurocomputing"},{"key":"1188_CR6","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/978-3-540-39804-2_12","volume-title":"Knowledge discovery in databases: PKDD 2003","author":"NV Chawla","year":"2003","unstructured":"Chawla NV, Lazarevic A, Hall LO, Bowyer KW. SMOTEBoost: improving prediction of the minority class in boosting. In: Lavra\u010d N, Gamberger D, Todorovski L, Blockeel H, editors. Knowledge discovery in databases: PKDD 2003. Berlin, Heidelberg: Springer; 2003. p. 107\u201319."},{"key":"1188_CR7","doi-asserted-by":"crossref","unstructured":"Junsomboon N, Phienthrakul T. Combining over-sampling and under-sampling techniques for imbalance dataset. In: Proceedings of the 9th international conference on machine learning and computing. Singapore: Association for Computing Machinery. 2017. p. 243\u20137.","DOI":"10.1145\/3055635.3056643"},{"key":"1188_CR8","first-page":"276","volume":"2018","author":"S Choirunnisa","year":"2018","unstructured":"Choirunnisa S, Lianto J. Hybrid method of undersampling and oversampling for handling imbalanced data. In, international seminar on research of information technology and intelligent systems (ISRITI). Yogyakart, Indonesia IEEE. 2018;2018:276\u201380.","journal-title":"Yogyakart, Indonesia IEEE"},{"key":"1188_CR9","unstructured":"Wainer J, Franceschinell RA. An empirical evaluation of imbalanced data strategies from a practitioner\u2019s point of view. arXiv preprint arXiv:181007168. 2018."},{"key":"1188_CR10","unstructured":"Costa AJ, Santos MS, Soares C, Abreu PH. Analysis of imbalance strategies recommendation using a meta-learning approach. In: 7th ICML workshop on automated machine learning (AutoML-ICML2020): ICML; 2020. p. 1\u201310."},{"key":"1188_CR11","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1016\/j.dss.2009.07.011","volume":"48","author":"A Sun","year":"2009","unstructured":"Sun A, Lim E-P, Liu Y. On strategies for imbalanced text classification using SVM: a comparative study. Decis Support Syst. 2009;48:191\u2013201.","journal-title":"Decis Support Syst"},{"key":"1188_CR12","volume-title":"Intelligent computing theories","author":"G Goel","year":"2013","unstructured":"Goel G, Maguire L, Li Y, McLoone S. Evaluation of sampling methods for learning from imbalanced data. In: Huang DS, Bevilacqua V, Figueroa JC, Premaratne P, editors. Intelligent computing theories. Berlin: Springer; 2013."},{"key":"1188_CR13","doi-asserted-by":"crossref","unstructured":"Shamsudin H, Yusof UK, Jayalakshmi A, Khalid MNA. Combining oversampling and undersampling techniques for imbalanced classification: a comparative study using credit card fraudulent transaction dataset. In: 2020 IEEE 16th international conference on control & automation (ICCA). Singapore: IEEE; 2020. p. 803\u20138.","DOI":"10.1109\/ICCA51439.2020.9264517"},{"key":"1188_CR14","doi-asserted-by":"crossref","unstructured":"A Gosain S Sardana. 2017. Handling class imbalance problem using oversampling techniques: a review. In, international conference on advances in computing, communications and informatics (ICACCI) Udupi, India: IEEE 2017 79 85","DOI":"10.1109\/ICACCI.2017.8125820"},{"key":"1188_CR15","doi-asserted-by":"publisher","first-page":"8546","DOI":"10.3390\/app11188546","volume":"11","author":"MS Kraiem","year":"2021","unstructured":"Kraiem MS, S\u00e1nchez-Hern\u00e1ndez F, Moreno-Garc\u00eda MN. Selecting the suitable resampling strategy for imbalanced data classification regarding dataset properties an approach based on association models. Appl Sci. 2021;11:8546.","journal-title":"Appl Sci"},{"key":"1188_CR16","doi-asserted-by":"publisher","first-page":"54","DOI":"10.3390\/info14010054","volume":"14","author":"T Wongvorachan","year":"2023","unstructured":"Wongvorachan T, He S, Bulut O. A comparison of undersampling, oversampling, and SMOTE methods for dealing with imbalanced classification in educational data mining. Information. 2023;14:54.","journal-title":"Information"},{"key":"1188_CR17","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1186\/s40537-024-00943-4","volume":"11","author":"M Mujahid","year":"2024","unstructured":"Mujahid M, K\u0131na E, Rustam F, Villar MG, Alvarado ES, Diez IDLT, et al. Data oversampling and imbalanced datasets: an investigation of performance for machine learning and feature engineering. J Big Data. 2024;11:87.","journal-title":"J Big Data"},{"key":"1188_CR18","doi-asserted-by":"publisher","first-page":"14050","DOI":"10.1109\/ACCESS.2024.3357091","volume":"12","author":"M Alamri","year":"2024","unstructured":"Alamri M, Ykhlef M. Hybrid undersampling and oversampling for handling imbalanced credit card data. IEEE Access. 2024;12:14050\u201360.","journal-title":"IEEE Access"},{"key":"1188_CR19","doi-asserted-by":"publisher","first-page":"3351","DOI":"10.3390\/math12213351","volume":"12","author":"F Parrales-Bravo","year":"2024","unstructured":"Parrales-Bravo F, Caicedo-Quiroz R, Tolozano-Benitez E, G\u00f3mez-Rodr\u00edguez V, Cevallos-Torres L, Charco-Aguirre J, et al. OUCH: oversampling and undersampling cannot help improve accuracy in our bayesian classifiers that predict preeclampsia. Mathematics. 2024;12:3351.","journal-title":"Mathematics"},{"key":"1188_CR20","doi-asserted-by":"publisher","DOI":"10.1088\/1755-1315\/58\/1\/012031","volume":"58","author":"B Santoso","year":"2017","unstructured":"Santoso B, Wijayanto H, Notodiputro KA, Sartono B. Synthetic over sampling methods for handling class imbalanced problems: a review. IOP Conf Ser: Earth Environ Sci. 2017;58: 012031.","journal-title":"IOP Conf Ser: Earth Environ Sci"},{"key":"1188_CR21","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/s40537-023-00857-7","volume":"11","author":"C Yang","year":"2024","unstructured":"Yang C, Fridgeirsson EA, Kors JA, Reps JM, Rijnbeek PR. Impact of random oversampling and random undersampling on the performance of prediction models developed using observational health data. J Big Data. 2024;11:7.","journal-title":"J Big Data"},{"key":"1188_CR22","doi-asserted-by":"publisher","first-page":"3947","DOI":"10.1109\/TNNLS.2019.2947658","volume":"31","author":"H Yu","year":"2020","unstructured":"Yu H, Lu J, Zhang G. Online topology learning by a Gaussian membership-based self-organizing incremental neural network. IEEE Trans Neural Netw Learn Syst. 2020;31:3947\u201361.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1188_CR23","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1109\/TFUZZ.2020.3039681","volume":"30","author":"H Yu","year":"2022","unstructured":"Yu H, Lu J, Zhang G. Topology learning-based fuzzy random neural networks for streaming data regression. IEEE Trans Fuzzy Syst. 2022;30:412\u201325.","journal-title":"IEEE Trans Fuzzy Syst"},{"key":"1188_CR24","doi-asserted-by":"publisher","first-page":"1048","DOI":"10.2991\/ijcis.d.200721.001","volume":"13","author":"JJ Winston","year":"2020","unstructured":"Winston JJ, Turker GF, Kose U, Hemanth DJ. Novel optimization based hybrid self-organizing map classifiers for iris image recognition. Int J Comput Intell Syst. 2020;13:1048\u201358.","journal-title":"Int J Comput Intell Syst"},{"key":"1188_CR25","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1023\/A:1007452223027","volume":"30","author":"M Kubat","year":"1998","unstructured":"Kubat M, Holte RC, Matwin S. Machine learning for the detection of oil spills in satellite radar images. Mach Learn. 1998;30:195\u2013215.","journal-title":"Mach Learn"},{"key":"1188_CR26","first-page":"105","volume":"2011","author":"L Tong","year":"2011","unstructured":"Tong L, Yongquan L, Weijian NA. A hybrid strategy for imbalanced classification. In, 3rd symposium on web society. Port Elizabeth: IEEE. 2011;2011:105\u201310.","journal-title":"Port Elizabeth: IEEE"},{"key":"1188_CR27","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s10462-024-10759-6","volume":"57","author":"W Chen","year":"2024","unstructured":"Chen W, Yang K, Yu Z, Shi Y, Chen CLP. A survey on imbalanced learning: latest research, applications and future directions. Artif Intell Rev. 2024;57:137.","journal-title":"Artif Intell Rev"},{"key":"1188_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.chemosphere.2022.137671","volume":"314","author":"HE Elzain","year":"2023","unstructured":"Elzain HE, Chung SY, Venkatramanan S, Selvam S, Ahemd HA, Seo YK, et al. Novel machine learning algorithms to predict the groundwater vulnerability index to nitrate pollution at two levels of modeling. Chemosphere. 2023;314: 137671.","journal-title":"Chemosphere"},{"key":"1188_CR29","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0254030","volume":"16","author":"H Wang","year":"2021","unstructured":"Wang H, Liu X. Undersampling bankruptcy prediction: Taiwan bankruptcy data. PLoS ONE. 2021;16: e0254030.","journal-title":"PLoS ONE"},{"key":"1188_CR30","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP. SMOTE: synthetic minority over-sampling technique. J Artif Intell Res. 2002;16:321\u201357.","journal-title":"J Artif Intell Res"},{"key":"1188_CR31","first-page":"1322","volume":"2008","author":"H He","year":"2008","unstructured":"He H, Bai Y, Garcia EA, Li S. ADASYN: adaptive synthetic sampling approach for imbalanced learning. In IEEE international joint conference on neural networks (IEEE world congress on computational intelligence). Hong Kong: IEEE. 2008;2008:1322\u20138.","journal-title":"Hong Kong: IEEE"},{"key":"1188_CR32","doi-asserted-by":"crossref","unstructured":"Han H, Wang WY, Mao BH. Borderline-SMOTE: a new over-sampling method in imbalanced data sets learning. In: International Conference on Intelligent Computing; 2005. p. 878\u201387.","DOI":"10.1007\/11538059_91"},{"key":"1188_CR33","unstructured":"Nguyen HM, Cooper EW, Kamei K. Borderline over-sampling for imbalanced data classification. In: Proceedings: fifth international workshop on computational intelligence & applications; 2009. p. 24\u20139."},{"key":"1188_CR34","unstructured":"Last F, Douzas G, Bacao F. Oversampling for imbalanced learning based on K-means and smote. arXiv preprint arXiv:171100837. 2017."},{"key":"1188_CR35","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1976.4309523","author":"I Tomek","year":"1976","unstructured":"Tomek I. An experiment with the edited nearest-neighbor rule. IEEE Trans Syst Man Cybern. 1976. https:\/\/doi.org\/10.1109\/TSMC.1976.4309523.","journal-title":"IEEE Trans Syst Man Cybern."},{"key":"1188_CR36","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1972.4309137","author":"DL Wilson","year":"1972","unstructured":"Wilson DL. Asymptotic properties of nearest neighbor rules using edited data. IEEE Trans Syst Man Cybern. 1972. https:\/\/doi.org\/10.1109\/TSMC.1972.4309137.","journal-title":"IEEE Trans Syst Man Cybern."},{"key":"1188_CR37","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1109\/TIT.1968.1054155","volume":"14","author":"P Hart","year":"1968","unstructured":"Hart P. The condensed nearest neighbor rule (Corresp.). IEEE Trans Inf Theory. 1968;14:515\u20136.","journal-title":"IEEE Trans Inf Theory"},{"key":"1188_CR38","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/3-540-48229-6_9","volume-title":"Artificial intelligence in medicine","author":"J Laurikkala","year":"2001","unstructured":"Laurikkala J. Improving identification of difficult small classes by balancing class distribution. In: Quaglini S, Barahona P, Andreassen S, editors. Artificial intelligence in medicine. Berlin: Springer; 2001. p. 63\u20136."},{"key":"1188_CR39","unstructured":"Kubat M, Matwin S. Addressing the curse of imbalanced training sets: one-sided selection. In: Proceedings of the fourteenth international conference on machine learning (ICML 1997). Nashville, Tennessee: Morgan Kaufmann. 1997. 179\u201386."},{"key":"1188_CR40","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1111\/j.1469-8137.1912.tb05611.x","volume":"11","author":"P Jaccard","year":"1912","unstructured":"Jaccard P. The distribution of the flora in the alpine zone. New Phytol. 1912;11:37\u201350.","journal-title":"New Phytol"},{"key":"1188_CR41","doi-asserted-by":"crossref","unstructured":"Sa\u0142abun W, Shekhovtsov A. An innovative drastic metric for ranking similarity in decision-making problems. In: Proceedings of the 18th conference on computer science and intelligence systems. Warsaw, Poland: ACSIS; 2023. p. 731\u20138.","DOI":"10.15439\/2023F6502"},{"key":"1188_CR42","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ. Learning representations by back-propagating errors. Nature. 1986;323:533\u20136.","journal-title":"Nature"},{"key":"1188_CR43","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2022.100280","volume":"12","author":"\u00c1J Garc\u00eda-Tejedor","year":"2022","unstructured":"Garc\u00eda-Tejedor \u00c1J, Nogales A. An open-source python library for self-organizing-maps. Softw Impacts. 2022;12: 100280.","journal-title":"Softw Impacts"},{"key":"1188_CR44","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra J, Bengio Y. Random search for hyper-parameter optimization. J Mach Learn Res. 2012;13:281\u2013305.","journal-title":"J Mach Learn Res"},{"key":"1188_CR45","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-319-03674-8_10","volume-title":"Advance trends in soft computing","author":"M Khalilia","year":"2014","unstructured":"Khalilia M, Popescu M. Topology preservation in fuzzy self-organizing maps. In: Jamshidi M, Kreinovich V, Kacprzyk J, editors. Advance trends in soft computing. Cham: Springer International Publishing; 2014. p. 105\u201314."},{"key":"1188_CR46","doi-asserted-by":"publisher","first-page":"15849","DOI":"10.1073\/pnas.1903070116","volume":"116","author":"M Belkin","year":"2019","unstructured":"Belkin M, Hsu D, Ma S, Mandal S. Reconciling modern machine-learning practice and the classical bias-variance trade-off. Proc Natl Acad Sci U S A. 2019;116:15849\u201354.","journal-title":"Proc Natl Acad Sci U S A"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01188-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s40537-025-01188-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01188-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T12:04:00Z","timestamp":1748952240000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-025-01188-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,3]]},"references-count":46,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1188"],"URL":"https:\/\/doi.org\/10.1186\/s40537-025-01188-5","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-5559968\/v1","asserted-by":"object"}]},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,3]]},"assertion":[{"value":"1 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"141"}}