{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T07:43:05Z","timestamp":1773214985160,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032061171","type":"print"},{"value":"9783032061188","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06118-8_15","type":"book-chapter","created":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:23:36Z","timestamp":1759058616000},"page":"247-264","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Harnessing Mixed Features for\u00a0Imbalance Data Oversampling: Application to\u00a0Bank Customers Scoring"],"prefix":"10.1007","author":[{"given":"Abdoulaye","family":"Sakho","sequence":"first","affiliation":[]},{"given":"Emmanuel","family":"Malherbe","sequence":"additional","affiliation":[]},{"given":"Carl-Erik","family":"Gauthier","sequence":"additional","affiliation":[]},{"given":"Erwan","family":"Scornet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,29]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Athey, S., Tibshirani, J., Wager, S.: Generalized random forests (2019)","DOI":"10.1214\/18-AOS1709"},{"issue":"1","key":"15_CR2","first-page":"1063","volume":"13","author":"G Biau","year":"2012","unstructured":"Biau, G.: Analysis of a random forests model. J. Mach. Learn. Res. 13(1), 1063\u20131095 (2012)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR3","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45, 5\u201332 (2001)","journal-title":"Mach. Learn."},{"key":"15_CR4","unstructured":"Cao, K., Wei, C., Gaidon, A., Arechiga, N., Ma, T.: Learning imbalanced datasets with label-distribution-aware margin loss. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: SMOTE: synthetic minority over-sampling technique. Journal of Artificial Intelligence Research (2002)","DOI":"10.1613\/jair.953"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wiesel, A., Eldar, Y.C., Hero, A.O.: Shrinkage algorithms for MMSE covariance estimation. IEEE Trans. Sig. Process. 58(10) (2010)","DOI":"10.1109\/TSP.2010.2053029"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Fern\u00e1ndez, A., Garc\u00eda, S., Galar, M., Prati, R.C., Krawczyk, B., Herrera, F.: Learning from imbalanced data sets, vol.\u00a010. Springer (2018)","DOI":"10.1007\/978-3-319-98074-4"},{"key":"15_CR8","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1016\/j.procs.2018.07.211","volume":"126","author":"M Garchery","year":"2018","unstructured":"Garchery, M., Granitzer, M.: On the influence of categorical features in ranking anomalies using mixed data. Procedia Comput. Sci. 126, 77\u201386 (2018)","journal-title":"Procedia Comput. Sci."},{"key":"15_CR9","first-page":"507","volume":"35","author":"L Grinsztajn","year":"2022","unstructured":"Grinsztajn, L., Oyallon, E., Varoquaux, G.: Why do tree-based models still outperform deep learning on typical tabular data? Adv. Neural. Inf. Process. Syst. 35, 507\u2013520 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Han, H., Wang, W.Y., Mao, B.H.: Borderline-smote: a new over-sampling method in imbalanced data sets learning. In: International Conference on Intelligent Computing, pp. 878\u2013887. Springer (2005)","DOI":"10.1007\/11538059_91"},{"key":"15_CR11","unstructured":"Hassan, A.K.I., Abraham, A.: Modeling insurance fraud detection using imbalanced data classification. In: Advances in Nature and Biologically Inspired Computing: Proceedings of the 7th World Congress on Nature and Biologically Inspired Computing (NaBIC2015) in Pietermaritzburg, South Africa, held December 01-03, 2015, pp. 117\u2013127. Springer (2016)"},{"key":"15_CR12","unstructured":"He, H., Bai, Y., Garcia, E.A., Li, S.: ADASYN: adaptive synthetic sampling approach for imbalanced learning. In: 2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence). IEEE (2008)"},{"key":"15_CR13","unstructured":"Ke, G., et al.: LightGBM: a highly efficient gradient boosting decision tree. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"15_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1472-6947-11-51","volume":"11","author":"M Khalilia","year":"2011","unstructured":"Khalilia, M., Chakraborty, S., Popescu, M.: Predicting disease risks from highly imbalanced data using random forest. BMC Med. Inform. Decis. Mak. 11, 1\u201313 (2011)","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"15_CR15","unstructured":"Kotelnikov, A., Baranchuk, D., Rubachev, I., Babenko, A.: TabDDPM: modelling tabular data with diffusion models. In: International Conference on Machine Learning, pp. 17564\u201317579. PMLR (2023)"},{"issue":"2","key":"15_CR16","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1016\/S0047-259X(03)00096-4","volume":"88","author":"O Ledoit","year":"2004","unstructured":"Ledoit, O., Wolf, M.: A well-conditioned estimator for large-dimensional covariance matrices. J. Multivar. Anal. 88(2), 365\u2013411 (2004)","journal-title":"J. Multivar. Anal."},{"key":"15_CR17","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1007\/s001800050018","volume":"14","author":"SS Lee","year":"1999","unstructured":"Lee, S.S.: Regularization in skewed binary classification. Comput. Stat. 14, 277\u2013292 (1999)","journal-title":"Comput. Stat."},{"issue":"2","key":"15_CR18","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/S0167-9473(99)00095-X","volume":"34","author":"SS Lee","year":"2000","unstructured":"Lee, S.S.: Noisy replication in skewed binary classification. Comput. Stat. Data Anal. 34(2), 165\u2013191 (2000)","journal-title":"Comput. Stat. Data Anal."},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Li, K., et\u00a0al.: SEFraud: graph-based self-explainable fraud detection via interpretative mask learning. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 5329\u20135338 (2024)","DOI":"10.1145\/3637528.3671534"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"15_CR21","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1007\/s10618-012-0295-5","volume":"28","author":"G Menardi","year":"2014","unstructured":"Menardi, G., Torelli, N.: Training and assessing classification rules with imbalanced data. Data Min. Knowl. Disc. 28, 92\u2013122 (2014)","journal-title":"Data Min. Knowl. Disc."},{"issue":"1","key":"15_CR22","doi-asserted-by":"publisher","first-page":"18","DOI":"10.3390\/asi4010018","volume":"4","author":"M Mukherjee","year":"2021","unstructured":"Mukherjee, M., Khushi, M.: SMOTE-ENC: a novel smote-based method to generate synthetic data for nominal and continuous features. Appl. Syst. Innovation 4(1), 18 (2021)","journal-title":"Appl. Syst. Innovation"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Nguyen, N.N., Duong, A.T.: Comparison of two main approaches for handling imbalanced data in churn prediction problem. J. Adv. Inf. Technol. 12(1) (2021)","DOI":"10.12720\/jait.12.1.29-35"},{"key":"15_CR24","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR25","unstructured":"Sakho, A., Malherbe, E., Scornet, E.: Do we need rebalancing strategies? A theoretical and empirical study around smote and its variants. arXiv:2402.03819 (2024)"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Scott, D.W.: Multivariate density estimation: theory, practice, and visualization. John Wiley & Sons (2015)","DOI":"10.1002\/9781118575574"},{"key":"15_CR27","doi-asserted-by":"crossref","unstructured":"Spelmen, V.S., Porkodi, R.: A review on handling imbalanced data. In: 2018 international conference on current trends towards converging technologies (ICCTCT), pp. 1\u201311. IEEE (2018)","DOI":"10.1109\/ICCTCT.2018.8551020"},{"issue":"12","key":"15_CR28","doi-asserted-by":"publisher","first-page":"1213","DOI":"10.1145\/7902.7906","volume":"29","author":"C Stanfill","year":"1986","unstructured":"Stanfill, C., Waltz, D.: Toward memory-based reasoning. Commun. ACM 29(12), 1213\u20131228 (1986)","journal-title":"Commun. ACM"},{"key":"15_CR29","unstructured":"Stocksieker, S., Pommeret, D., Charpentier, A.: Generalized oversampling for learning from imbalanced datasets and associated theory: Application in regression"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Sun, Y., Wong, A.K., Kamel, M.S.: Classification of imbalanced data: a review. Int. J. Pattern Recogn. Artif. Intell. 23(04) (2009)","DOI":"10.1142\/S0218001409007326"},{"key":"15_CR31","doi-asserted-by":"crossref","unstructured":"Tang, B., He, H.: KernelADASYN: kernel based adaptive synthetic data generation for imbalanced learning. In: IEEE Congress on Evolutionary Computation (2015)","DOI":"10.1109\/CEC.2015.7256954"},{"key":"15_CR32","unstructured":"Xu, L., Skoularidou, M., Cuesta-Infante, A., Veeramachaneni, K.: Modeling tabular data using conditional GAN. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"15_CR33","unstructured":"Zhyli: Prediction of churning credit card customers [data set] (2020)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06118-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:23:43Z","timestamp":1759058623000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06118-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"ISBN":["9783032061171","9783032061188"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06118-8_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,29]]},"assertion":[{"value":"29 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}