{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:20:23Z","timestamp":1777652423365,"version":"3.51.4"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Ministry of Science and ICT (MSIT) ; National Research Foundation of Korea"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"DOI":"10.1186\/s40537-025-01313-4","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T14:33:07Z","timestamp":1765290787000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Accuracy, precision, recall, f1-score, or MCC? empirical evidence from advanced statistics, ML, and XAI for evaluating business predictive models"],"prefix":"10.1186","volume":"12","author":[{"given":"Khaled Mahmud","family":"Sujon","sequence":"first","affiliation":[]},{"given":"Rohayanti","family":"Hassan","sequence":"additional","affiliation":[]},{"given":"Kwonhue","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Md Abdus","family":"Samad","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,9]]},"reference":[{"issue":"11","key":"1313_CR1","first-page":"1","volume":"161","author":"S Bhatia","year":"2017","unstructured":"Bhatia S, Sharma P, Burman R, Hazari S, Hande R. Credit scoring using machine learning techniques. Int J Comput Appl. 2017;161(11):1\u20134.","journal-title":"Int J Comput Appl"},{"key":"1313_CR2","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0118432","author":"T Saito","year":"2015","unstructured":"Saito T, Rehmsmeier M. The precision-recall plot is more informative than the roc plot when evaluating binary classifiers on imbalanced datasets. PLoS ONE. 2015. https:\/\/doi.org\/10.1371\/journal.pone.0118432.","journal-title":"PLoS ONE"},{"key":"1313_CR3","doi-asserted-by":"publisher","first-page":"4039","DOI":"10.1609\/aaai.v33i01.33014039","volume":"33","author":"B Juba","year":"2019","unstructured":"Juba B, Le HS. Precision-recall versus accuracy and the role of large data sets. Proceedings of the AAAI Conference on Artificial Intelligence. 2019;33:4039\u201348.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"1313_CR4","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0177678","author":"S Boughorbel","year":"2017","unstructured":"Boughorbel S, Jarray F, El-Anbari M. Optimal classifier for imbalanced data using matthews correlation coefficient metric. PLoS ONE. 2017. https:\/\/doi.org\/10.1371\/journal.pone.0177678.","journal-title":"PLoS ONE"},{"key":"1313_CR5","doi-asserted-by":"publisher","DOI":"10.21629\/jsee.2019.06.12","author":"X Xu","year":"2019","unstructured":"Xu X, Chen W, Sun Y. Over-sampling algorithm for imbalanced data classification. JSEE. 2019. https:\/\/doi.org\/10.21629\/jsee.2019.06.12.","journal-title":"JSEE"},{"key":"1313_CR6","doi-asserted-by":"publisher","unstructured":"Shaer L, Kanj R, Joshi R. Data imbalance handling approaches for accurate statistical modeling and yield analysis of memory designs. 2019 IEEE International Symposium on Circuits and Systems (ISCAS), 1\u20135 2019 https:\/\/doi.org\/10.1109\/ISCAS.2019.8702731","DOI":"10.1109\/ISCAS.2019.8702731"},{"key":"1313_CR7","doi-asserted-by":"publisher","unstructured":"Wu X, Huang F, Huang H. Fast stochastic recursive momentum methods for imbalanced data mining. 2022 IEEE International Conference on Data Mining (ICDM), 578\u2013587 (2022) https:\/\/doi.org\/10.1109\/ICDM54844.2022.00068","DOI":"10.1109\/ICDM54844.2022.00068"},{"issue":"3\u20132","key":"1313_CR8","doi-asserted-by":"publisher","first-page":"1599","DOI":"10.62527\/joiv.8.3-2.2449","volume":"8","author":"KM Sujon","year":"2024","unstructured":"Sujon KM, Hassan R, Khairudin AR, Moi SH, Shafie MLM, Saringat Z, et al. The effects of imbalanced datasets on machine learning algorithms in predicting student performance. JOIV: International Journal on Informatics Visualization. 2024;8(3\u20132):1599\u2013605.","journal-title":"JOIV: International Journal on Informatics Visualization"},{"key":"1313_CR9","doi-asserted-by":"crossref","unstructured":"Japkowicz N. Assessment metrics for imbalanced learning. Imbalanced learning: Foundations, algorithms, and applications, 2013;187\u2013206","DOI":"10.1002\/9781118646106.ch8"},{"issue":"1","key":"1313_CR10","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.patrec.2008.08.010","volume":"30","author":"C Ferri","year":"2009","unstructured":"Ferri C, Hern\u00e1ndez-Orallo J, Modroiu R. An experimental comparison of performance measures for classification. Pattern Recogn Lett. 2009;30(1):27\u201338.","journal-title":"Pattern Recogn Lett"},{"key":"1313_CR11","unstructured":"Kubat M. Addressing the curse of imbalanced training sets: one-sided selection. In: Proceedings of the 14th International Conference on Machine Learning, pp. 179\u2013186 (1997). Morgan Kaufmann"},{"key":"1313_CR12","doi-asserted-by":"publisher","unstructured":"Diallo R, Edalo C, Awe OO. Machine learning evaluation of imbalanced health data: A comparative analysis of balanced accuracy, mcc, and f1 score. In: Awe, O.O., Vance, E.A. (eds.) Practical Statistical Learning and Data Science Methods: Case Studies from LISA 2020 Global Network, USA. STEAM-H: Science, Technology, Engineering, Agriculture, Mathematics & Health, pp. 283\u2013312. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-72215-8_12","DOI":"10.1007\/978-3-031-72215-8_12"},{"key":"1313_CR13","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1504\/IJICT.2018.10011701","volume":"13","author":"M Zareapoor","year":"2018","unstructured":"Zareapoor M, Shamsolmoali P. Boosting prediction performance on imbalanced dataset. Int J Inf Commun Technol. 2018;13:186\u201395. https:\/\/doi.org\/10.1504\/IJICT.2018.10011701.","journal-title":"Int J Inf Commun Technol"},{"key":"1313_CR14","doi-asserted-by":"publisher","first-page":"222","DOI":"10.15623\/IJRET.2014.0310034","volume":"03","author":"M Imran","year":"2014","unstructured":"Imran M, Qyser A, Ali SS, Kumar V, Jah M, Malla N. An overview on data mining designed for imbalanced datasets. International Journal of Research in Engineering and Technology. 2014;03:222\u20135. https:\/\/doi.org\/10.15623\/IJRET.2014.0310034.","journal-title":"International Journal of Research in Engineering and Technology"},{"key":"1313_CR15","doi-asserted-by":"publisher","unstructured":"Chakraborty T. Imbalanced ensemble classifier for learning from imbalanced business school dataset. International Journal of Mathematical, Engineering and Management Sciences 2018; https:\/\/doi.org\/10.33889\/IJMEMS.2019.4.4-068","DOI":"10.33889\/IJMEMS.2019.4.4-068"},{"key":"1313_CR16","doi-asserted-by":"publisher","first-page":"3043","DOI":"10.1007\/S00542-019-04566-1","volume":"26","author":"Z Lee","year":"2020","unstructured":"Lee Z, Lee C-Y, Chou S-T, Ma W-P, Ye F, Chen Z. A hybrid system for imbalanced data mining. Microsyst Technol. 2020;26:3043\u20137. https:\/\/doi.org\/10.1007\/S00542-019-04566-1.","journal-title":"Microsyst Technol"},{"key":"1313_CR17","doi-asserted-by":"publisher","unstructured":"Syaripudin A, Khodra ML. A comparison for handling imbalanced datasets. 2014 International Conference of Advanced Informatics: Concept, Theory and Application (ICAICTA), 293\u2013298 (2014) https:\/\/doi.org\/10.1109\/ICAICTA.2014.7005957","DOI":"10.1109\/ICAICTA.2014.7005957"},{"key":"1313_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-04663-7_4","author":"S Vluymans","year":"2018","unstructured":"Vluymans S. Learning from imbalanced data. Dealing with Imbalanced and Weakly Labelled Data in Machine Learning using Fuzzy and Rough Set Methods. 2018. https:\/\/doi.org\/10.1007\/978-3-030-04663-7_4.","journal-title":"Dealing with Imbalanced and Weakly Labelled Data in Machine Learning using Fuzzy and Rough Set Methods"},{"key":"1313_CR19","doi-asserted-by":"publisher","unstructured":"Yan Y, Liu Y, Shyu M, Chen M. Utilizing concept correlations for effective imbalanced data classification. Proceedings of the 2014 IEEE 15th International Conference on Information Reuse and Integration (IEEE IRI 2014), 561\u2013568 (2014) https:\/\/doi.org\/10.1109\/IRI.2014.7051939","DOI":"10.1109\/IRI.2014.7051939"},{"key":"1313_CR20","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/j.dss.2018.06.011","volume":"112","author":"D Veganzones","year":"2018","unstructured":"Veganzones D, S\u00e9verin E. An investigation of bankruptcy prediction in imbalanced datasets. Decis Support Syst. 2018;112:111\u201324. https:\/\/doi.org\/10.1016\/j.dss.2018.06.011.","journal-title":"Decis Support Syst"},{"key":"1313_CR21","doi-asserted-by":"publisher","DOI":"10.1002\/eng2.12298","author":"S Susan","year":"2020","unstructured":"Susan S, Kumar A. The balancing trick: optimized sampling of imbalanced datasets-a brief survey of the recent state of the art. Eng Rep (Hoboken). 2020. https:\/\/doi.org\/10.1002\/eng2.12298.","journal-title":"Eng Rep (Hoboken)"},{"key":"1313_CR22","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.ins.2020.12.006","volume":"553","author":"VH Barella","year":"2021","unstructured":"Barella VH, Garcia LPF, Souto MD, Lorena AC, Carvalho A. Assessing the data complexity of imbalanced datasets. Inf Sci. 2021;553:83\u2013109. https:\/\/doi.org\/10.1016\/j.ins.2020.12.006.","journal-title":"Inf. Sci."},{"key":"1313_CR23","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1109\/TPAMI.2019.2929166","volume":"43","author":"F Wu","year":"2021","unstructured":"Wu F, Jing X, Shan S, Zuo W, Yang J-y. Multiset feature learning for highly imbalanced data classification. IEEE Trans Pattern Anal Mach Intell. 2021;43:139\u201356.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1313_CR24","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/J.ESWA.2019.04.011","volume":"129","author":"X Tao","year":"2019","unstructured":"Tao X, Li Q, Ren C, Guo W, Li C, He Q, et al. Real-value negative selection over-sampling for imbalanced data set learning. Expert Syst Appl. 2019;129:118\u201334. https:\/\/doi.org\/10.1016\/J.ESWA.2019.04.011.","journal-title":"Expert Syst Appl"},{"key":"1313_CR25","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-5225-2255-3.CH159","author":"L Mathews","year":"2019","unstructured":"Mathews L, Hari S. Learning from imbalanced data. Advances in Computer and Electrical Engineering. 2019. https:\/\/doi.org\/10.4018\/978-1-5225-2255-3.CH159.","journal-title":"Advances in Computer and Electrical Engineering"},{"key":"1313_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106087","volume":"203","author":"J Zhao","year":"2020","unstructured":"Zhao J, Jin J, Chen S, Zhang R, Yu B, Liu Q. A weighted hybrid ensemble method for classifying imbalanced data. Knowl-Based Syst. 2020;203:106087. https:\/\/doi.org\/10.1016\/j.knosys.2020.106087.","journal-title":"Knowl-Based Syst"},{"key":"1313_CR27","first-page":"27","volume":"3","author":"M Bekkar","year":"2013","unstructured":"Bekkar M, Djemaa H, Alitouche TA. Evaluation measures for models assessment over imbalanced data sets. Journal of Information Engineering and Applications. 2013;3:27\u201338.","journal-title":"Journal of Information Engineering and Applications"},{"key":"1313_CR28","doi-asserted-by":"publisher","unstructured":"Basha SJ, Madala S, Vivek K, Kumar ES, Ammannamma T. A review on imbalanced data classification techniques. 2022 International Conference on Advanced Computing Technologies and Applications (ICACTA), 1\u20136 (2022) https:\/\/doi.org\/10.1109\/ICACTA54488.2022.9753392","DOI":"10.1109\/ICACTA54488.2022.9753392"},{"key":"1313_CR29","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.infsof.2017.07.004","volume":"92","author":"MM \u00d6zt\u00fcrk","year":"2017","unstructured":"\u00d6zt\u00fcrk MM. Which type of metrics are useful to deal with class imbalance in software defect prediction? Inf Softw Technol. 2017;92:17\u201329.","journal-title":"Inf Softw Technol"},{"issue":"3","key":"1313_CR30","first-page":"1447","volume":"40","author":"A Cruz Huayanay","year":"2025","unstructured":"Cruz Huayanay A, Baz\u00e1n JL, Russo CM. Performance of evaluation metrics for classification in imbalanced data. Comput Stat. 2025;40(3):1447\u201373.","journal-title":"Comput Stat"},{"issue":"8","key":"1313_CR31","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1016\/j.patrec.2005.10.010","volume":"27","author":"T Fawcett","year":"2006","unstructured":"Fawcett T. An introduction to roc analysis. Pattern Recognit Lett. 2006;27(8):861\u201374.","journal-title":"Pattern Recognit Lett"},{"key":"1313_CR32","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.knosys.2014.01.021","volume":"59","author":"V Garc\u00eda","year":"2014","unstructured":"Garc\u00eda V, Mollineda RA, S\u00e1nchez JS. A bias correction function for classification performance assessment in two-class imbalanced problems. Knowledge-Based Systems. 2014;59:66\u201374.","journal-title":"Knowledge-Based Systems"},{"key":"1313_CR33","doi-asserted-by":"crossref","unstructured":"Jim\u00e9nez-Navarro M, Troncoso-Garc\u00eda A, Troncoso A, Mart\u00ednez-\u00c1lvarez F, Mart\u00ednez-Ballesteros M. Explainable deep learning with embedded feature selection for electricity demand forecasting. In: 2024 International Conference on Smart Systems and Technologies (SST), pp. 153\u2013158 (2024). IEEE","DOI":"10.1109\/SST61991.2024.10755283"},{"key":"1313_CR34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3540513","author":"AR Troncoso-Garcia","year":"2025","unstructured":"Troncoso-Garcia AR, Martinez-Ballesteros M, Martinez-Alvarez F, Troncoso A. A new metric based on association rules to assess feature-attribution explainability techniques for time series forecasting. IEEE Trans Pattern Anal Mach Intell. 2025. https:\/\/doi.org\/10.1109\/TPAMI.2025.3540513.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1313_CR35","doi-asserted-by":"publisher","first-page":"2930","DOI":"10.1016\/j.procs.2022.09.351","volume":"207","author":"A Troncoso-Garc\u00eda","year":"2022","unstructured":"Troncoso-Garc\u00eda A, Mart\u00ednez-Ballesteros M, Mart\u00ednez-\u00c1lvarez F, Troncoso A. Explainable machine learning for sleep apnea prediction. Procedia Comput Sci. 2022;207:2930\u20139.","journal-title":"Procedia Comput Sci"},{"key":"1313_CR36","doi-asserted-by":"crossref","unstructured":"Kadir MA, Mosavi A, Sonntag D. Evaluation metrics for xai: A review, taxonomy, and practical applications. In: 2023 IEEE 27th International Conference on Intelligent Engineering Systems (INES), pp. 000111\u2013000124 (2023). IEEE","DOI":"10.1109\/INES59282.2023.10297629"},{"key":"1313_CR37","unstructured":"Wong T-T, Chung P-C. A consistency analysis on four evaluation metrics for classifying imbalanced data. Knowledge and Information Systems, 2025; 1\u201318"},{"key":"1313_CR38","doi-asserted-by":"publisher","first-page":"135300","DOI":"10.1109\/ACCESS.2024.3462434","volume":"12","author":"K Mahmud Sujon","year":"2024","unstructured":"Mahmud Sujon K, Binti Hassan R, Tusnia Towshi Z, Othman MA, Abdus Samad M, Choi K. When to use standardization and normalization: empirical evidence from machine learning models and xai. IEEE Access. 2024;12:135300\u201314. https:\/\/doi.org\/10.1109\/ACCESS.2024.3462434.","journal-title":"IEEE Access"},{"issue":"3","key":"1313_CR39","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1214\/ss\/1042727940","volume":"17","author":"RJ Bolton","year":"2002","unstructured":"Bolton RJ, Hand DJ. Statistical fraud detection: a review. Stat Sci. 2002;17(3):235\u201355.","journal-title":"Stat Sci"},{"key":"1313_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1472-6947-11-51","volume":"11","author":"M Khalilia","year":"2011","unstructured":"Khalilia M, Chakraborty S, Popescu M. Predicting disease risks from highly imbalanced data using random forest. BMC Med Inform Decis Mak. 2011;11:1\u201313.","journal-title":"BMC Med Inform Decis Mak"},{"key":"1313_CR41","doi-asserted-by":"crossref","unstructured":"Gilmore E, Estivill-Castro V, Hexel R. More interpretable decision trees. In: Hybrid Artificial Intelligent Systems: 16th International Conference, HAIS 2021, Bilbao, Spain, September 22\u201324, 2021, Proceedings 16, pp. 280\u2013292 (2021). Springer","DOI":"10.1007\/978-3-030-86271-8_24"},{"key":"1313_CR42","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C. Xgboost: A scalable tree boosting system. In: Proceedings of the 22nd Acm Sigkdd International Conference on Knowledge Discovery and Data Mining, 2016;pp. 785\u2013794","DOI":"10.1145\/2939672.2939785"},{"issue":"1","key":"1313_CR43","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-024-00973-y","volume":"11","author":"RK Halder","year":"2024","unstructured":"Halder RK, Uddin MN, Uddin MA, Aryal S, Khraisat A. Enhancing k-nearest neighbor algorithm: a comprehensive review and performance analysis of modifications. J Big Data. 2024;11(1):113.","journal-title":"J Big Data"},{"key":"1313_CR44","unstructured":"Brownlee J. Failure of classification accuracy for imbalanced class distributions. Machine Learning Mastery. 2020;31."},{"issue":"1","key":"1313_CR45","first-page":"33","volume":"1","author":"SM Najem","year":"2021","unstructured":"Najem SM, Kadeem SM. A survey on fraud detection techniques in e-commerce. Tech-Knowledge. 2021;1(1):33\u201347.","journal-title":"Tech-Knowledge"},{"issue":"3","key":"1313_CR46","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1108\/EL-12-2019-0287","volume":"38","author":"M Zeynali Tazehkandi","year":"2020","unstructured":"Zeynali Tazehkandi M, Nowkarizi M. Three approaches to measuring recall on the web: a systematic review. Electron Libr. 2020;38(3):477\u201392.","journal-title":"Electron Libr"},{"key":"1313_CR47","doi-asserted-by":"crossref","unstructured":"Jeni LA, Cohn JF, De\u00a0La\u00a0Torre F. Facing imbalanced data\u2013recommendations for the use of performance metrics. In: 2013 Humaine Association Conference on Affective Computing and Intelligent Interaction, pp. 245\u2013251 (2013). IEEE","DOI":"10.1109\/ACII.2013.47"},{"key":"1313_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12864-019-6413-7","volume":"21","author":"D Chicco","year":"2020","unstructured":"Chicco D, Jurman G. The advantages of the matthews correlation coefficient (MCC) over f1 score and accuracy in binary classification evaluation. BMC Genomics. 2020;21:1\u201313.","journal-title":"BMC Genomics"},{"issue":"23","key":"1313_CR49","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11234019","volume":"11","author":"M Alsulmi","year":"2022","unstructured":"Alsulmi M. From ranking search results to managing investment portfolios: exploring rank-based approaches for portfolio stock selection. Electronics. 2022;11(23):4019.","journal-title":"Electronics"},{"key":"1313_CR50","doi-asserted-by":"publisher","first-page":"146876","DOI":"10.1109\/ACCESS.2019.2945907","volume":"7","author":"Y Alsubaie","year":"2019","unstructured":"Alsubaie Y, El Hindi K, Alsalman H. Cost-sensitive prediction of stock price direction: selection of technical indicators. IEEE Access. 2019;7:146876\u201392.","journal-title":"IEEE Access"},{"issue":"4","key":"1313_CR51","doi-asserted-by":"publisher","DOI":"10.3390\/s23042333","volume":"23","author":"S Szeghalmy","year":"2023","unstructured":"Szeghalmy S, Fazekas A. A comparative study of the use of stratified cross-validation and distribution-balanced stratified cross-validation in imbalanced learning. Sensors. 2023;23(4):2333.","journal-title":"Sensors"},{"key":"1313_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00265-020-02916-y","volume":"74","author":"MQ Pembury Smith","year":"2020","unstructured":"Pembury Smith MQ, Ruxton GD. Effective use of the mcnemar test. Behav Ecol Sociobiol. 2020;74:1\u20139.","journal-title":"Behav Ecol Sociobiol"},{"issue":"3","key":"1313_CR53","doi-asserted-by":"publisher","first-page":"1001","DOI":"10.25300\/MISQ\/2018\/13587","volume":"42","author":"MI Aguirre-Urreta","year":"2018","unstructured":"Aguirre-Urreta MI, R\u00f6nkk\u00f6 M. Statistical inference with plsc using bootstrap confidence intervals. MIS Q. 2018;42(3):1001\u201310.","journal-title":"MIS Q"},{"issue":"16","key":"1313_CR54","doi-asserted-by":"publisher","DOI":"10.3390\/ijerph191610213","volume":"19","author":"B Wi\u0119ckowska","year":"2022","unstructured":"Wi\u0119ckowska B, Kubiak KB, J\u00f3\u017awiak P, Moryson W, Stawi\u0144ska-Witoszy\u0144ska B. Cohen\u2019s kappa coefficient as a measure to assess classification improvement following the addition of a new marker to a regression model. Int J Environ Res Public Health. 2022;19(16):10213.","journal-title":"Int J Environ Res Public Health"},{"key":"1313_CR55","doi-asserted-by":"crossref","unstructured":"Chase\u00a0Lipton Z, Elkan C, Narayanaswamy B. Thresholding classifiers to maximize f1 score. arXiv e-prints, 2014; 1402","DOI":"10.1007\/978-3-662-44851-9_15"},{"key":"1313_CR56","doi-asserted-by":"publisher","DOI":"10.3389\/fonc.2023.1177225","volume":"13","author":"J Jiang","year":"2023","unstructured":"Jiang J, Jiang X, Xu L, Zhang Y, Zheng Y, Kong D. Noise-robustness test for ultrasound breast nodule neural network models as medical devices. Front Oncol. 2023;13:1177225.","journal-title":"Front Oncol"},{"issue":"6","key":"1313_CR57","doi-asserted-by":"publisher","first-page":"2576","DOI":"10.3758\/s13428-021-01587-5","volume":"53","author":"U Knief","year":"2021","unstructured":"Knief U, Forstmeier W. Violating the normality assumption may be the lesser of two evils. Behav Res Methods. 2021;53(6):2576\u201390.","journal-title":"Behav Res Methods"},{"key":"1313_CR58","doi-asserted-by":"publisher","unstructured":"Yeh I-C, Lien C-h. Default of Credit Card Clients Dataset. UCI Machine Learning Repository 2009; https:\/\/doi.org\/10.24432\/C55S3H.","DOI":"10.24432\/C55S3H"},{"issue":"1","key":"1313_CR59","doi-asserted-by":"publisher","first-page":"1","DOI":"10.56705\/ijaimi.v2i1.137","volume":"2","author":"H Azis","year":"2024","unstructured":"Azis H. Assessing the performance of logistic regression in heart disease detection through 5-fold cross-validation. International Journal of Artificial Intelligence in Medical Issues. 2024;2(1):1\u201311.","journal-title":"International Journal of Artificial Intelligence in Medical Issues"},{"key":"1313_CR60","doi-asserted-by":"crossref","unstructured":"Maina DG, Moso JC, Gikunda PK. Detecting fraud in motor insurance claims using xgboost algorithm with smote. In: 2023 International Conference on Information and Communication Technology for Development for Africa (ICT4DA), 2023;pp. 61\u201366 . IEEE","DOI":"10.1109\/ICT4DA59526.2023.10302229"},{"key":"1313_CR61","doi-asserted-by":"crossref","unstructured":"Basak S, Huber M. Evolutionary feature scaling in k-nearest neighbors based on label dispersion minimization. In: 2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC), 2020; pp. 928\u2013935. IEEE","DOI":"10.1109\/SMC42975.2020.9282834"},{"key":"1313_CR62","doi-asserted-by":"crossref","unstructured":"Kusa W, Peikos G, Staudinger M, Lipani A, Hanbury A. Normalised precision at fixed recall for evaluating tar. In: Proceedings of the 2024 ACM SIGIR International Conference on Theory of Information Retrieval, 2024; pp. 43\u201349","DOI":"10.1145\/3664190.3672532"},{"issue":"10","key":"1313_CR63","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0291908","volume":"18","author":"GM Foody","year":"2023","unstructured":"Foody GM. Challenges in the real world use of classification accuracy metrics: from recall and precision to the matthews correlation coefficient. PLoS ONE. 2023;18(10):0291908.","journal-title":"PLoS ONE"},{"key":"1313_CR64","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112143","volume":"300","author":"J Tang","year":"2024","unstructured":"Tang J, Li Y, Hou Z, Fu S, Tian Y. Robust two-stage instance-level cost-sensitive learning method for class imbalance problem. Knowledge-Based Systems. 2024;300:112143.","journal-title":"Knowledge-Based Systems"},{"key":"1313_CR65","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0087752","author":"X Chen","year":"2014","unstructured":"Chen X, Chen P. A comparison of four methods for the analysis of n-of-1 trials. PLoS ONE. 2014. https:\/\/doi.org\/10.1371\/journal.pone.0087752.","journal-title":"PLoS ONE"},{"key":"1313_CR66","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pdig.0000290","author":"M Owusu-Adjei","year":"2023","unstructured":"Owusu-Adjei M, Hayfron-Acquah JB, Frimpong T, Abdul-Salaam G. Imbalanced class distribution and performance evaluation metrics: a systematic review of prediction accuracy for determining model performance in healthcare systems. PLoS Digit Health. 2023. https:\/\/doi.org\/10.1371\/journal.pdig.0000290.","journal-title":"PLoS Digit Health"},{"key":"1313_CR67","doi-asserted-by":"publisher","unstructured":"Wardhani NWS, Rochayani MY, Iriany A, Sulistyono A, Lestantyo P. Cross-validation metrics for evaluating classification performance on imbalanced data. 2019 International Conference on Computer, Control, Informatics and its Applications (IC3INA), 14\u201318 (2019) https:\/\/doi.org\/10.1109\/IC3INA48034.2019.8949568","DOI":"10.1109\/IC3INA48034.2019.8949568"},{"key":"1313_CR68","doi-asserted-by":"publisher","unstructured":"Explaining xgboost predictions with shap value. A comprehensive guide to interpreting decision tree-based models. New Trends in Computer Sciences. 2023. https:\/\/doi.org\/10.3846\/ntcs.2023.17901.","DOI":"10.3846\/ntcs.2023.17901"},{"key":"1313_CR69","unstructured":"Mokhtari KE, Higdon BP, Ba\u015far A. Interpreting financial time series with shap values. In: Proceedings of the 29th Annual International Conference on Computer Science and Software Engineering, 2019;pp. 166\u2013172"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01313-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s40537-025-01313-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01313-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T03:01:36Z","timestamp":1765335696000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-025-01313-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":69,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1313"],"URL":"https:\/\/doi.org\/10.1186\/s40537-025-01313-4","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,9]]},"assertion":[{"value":"11 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"268"}}