{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T21:15:38Z","timestamp":1773868538988,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T00:00:00Z","timestamp":1752710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T00:00:00Z","timestamp":1752710400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"DOI":"10.1186\/s40537-025-01217-3","type":"journal-article","created":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T15:04:58Z","timestamp":1752764698000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Enhancing fraud detection in imbalanced motor insurance datasets using CP-SMOTE and Random Under-Sampling"],"prefix":"10.1186","volume":"12","author":[{"given":"Pornpawee","family":"Komsrimorakot","sequence":"first","affiliation":[]},{"given":"Thitirat","family":"Siriborvornratanakul","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,17]]},"reference":[{"key":"1217_CR1","doi-asserted-by":"publisher","DOI":"10.1111\/jori.12452","author":"D Banulescu-Radu","year":"2023","unstructured":"Banulescu-Radu D, Yankol\u2010Schalck M. Practical guideline to efficiently detect insurance fraud in the era of machine learning: A household insurance case. J Risk Insurance. 2023. https:\/\/doi.org\/10.1111\/jori.12452.","journal-title":"J Risk Insurance"},{"key":"1217_CR2","doi-asserted-by":"publisher","DOI":"10.1109\/icicct.2018.8473034","author":"G Kowshalya","year":"2018","unstructured":"Kowshalya G, Nandhini M. Predicting fraudulent claims in automobile insurance. 2018 Second Int Conf Inventive Communication Comput Technol (ICICCT). 2018. https:\/\/doi.org\/10.1109\/icicct.2018.8473034.","journal-title":"2018 Second Int Conf Inventive Communication Comput Technol (ICICCT)"},{"issue":"1","key":"1217_CR3","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.jfds.2016.03.001","volume":"2","author":"K Nian","year":"2016","unstructured":"Nian K, Zhang H, Tayal A, Coleman T, Li Y. Auto insurance fraud detection using unsupervised spectral ranking for anomaly. J Finance Data Sci. 2016;2(1):58\u201375. https:\/\/doi.org\/10.1016\/j.jfds.2016.03.001","journal-title":"The Journal of Finance and Data Science"},{"key":"1217_CR4","unstructured":"Mordor Intelligence. (2024). Motor Insurance Market in Thailand - Companies & Statistics. Mordorintelligence.com. https:\/\/www.mordorintelligence.com\/industry-reports\/thailand-motor-insurance-market"},{"key":"1217_CR5","doi-asserted-by":"publisher","first-page":"1012","DOI":"10.1016\/j.procs.2023.10.610","volume":"227","author":"Prediction of Health Insurance Claims Using Logistic Regression and XGBoost Methods","year":"2023","unstructured":"Permai SD, Herdianto K. Prediction of Health Insurance Claims Using Logistic Regression and XGBoost Methods. Procedia Comput Sci. 2023;227:1012\u20139. https:\/\/doi.org\/10.1016\/j.procs.2023.10.610","journal-title":"Procedia Comput Sci"},{"key":"1217_CR6","doi-asserted-by":"publisher","unstructured":"Vorobyev I. Fraud risk assessment in car insurance using claims graph features in machine learning. Expert Syst Appl. 2024;124109\u2013124109. https:\/\/doi.org\/10.1016\/j.eswa.2024.124109.","DOI":"10.1016\/j.eswa.2024.124109"},{"key":"1217_CR7","doi-asserted-by":"publisher","first-page":"200294","DOI":"10.1016\/j.iswa.2023.200294","volume":"20","author":"Q Lu","year":"2023","unstructured":"Lu Q, Fu C, Nan K, Fang Y, Xu J, Liu J, Bellotti AG, Lee BG. Chinese corporate fraud risk assessment with machine learning. Intell Syst Appl. 2023;20:200294. https:\/\/doi.org\/10.1016\/j.iswa.2023.200294.","journal-title":"Intell Syst Appl"},{"key":"1217_CR8","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.procs.2020.06.014","volume":"173","author":"S Bagga","year":"2020","unstructured":"Bagga S, Goyal A, Gupta N, Goyal A. Credit card fraud detection using pipeling and ensemble learning. Procedia Comput Sci. 2020;173:104\u201312. https:\/\/doi.org\/10.1016\/j.procs.2020.06.014.","journal-title":"Procedia Comput Sci"},{"issue":"16","key":"1217_CR9","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP. SMOTE: synthetic minority Over-sampling technique. J Artif Intell Res. 2002;16(16):321\u201357. https:\/\/doi.org\/10.1613\/jair.953.","journal-title":"J Artif Intell Res"},{"key":"1217_CR10","doi-asserted-by":"publisher","first-page":"103089","DOI":"10.1016\/j.jbi.2018.12.003","volume":"90","author":"S Fotouhi","year":"2019","unstructured":"Fotouhi S, Asadi S, Kattan MW. A comprehensive data level analysis for cancer diagnosis on imbalanced data. J Biomed Inform. 2019;90:103089. https:\/\/doi.org\/10.1016\/j.jbi.2018.12.003.","journal-title":"J Biomed Inform"},{"key":"1217_CR11","doi-asserted-by":"publisher","unstructured":"Chen LS, Chen JC. Using Data Mining Methods to Detect Medical Fraud. Procs. of the International Conference on Management of e-Commerce and e-Government (ICMECG). 2020. https:\/\/doi.org\/10.1145\/3409891.3409902","DOI":"10.1145\/3409891.3409902"},{"issue":"1","key":"1217_CR12","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GEAPA Batista","year":"2004","unstructured":"Batista GEAPA, Prati RC, Monard MC. A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explorations Newsl. 2004;6(1):20\u20139. https:\/\/doi.org\/10.1145\/1007730.1007735.","journal-title":"ACM SIGKDD Explorations Newsl"},{"key":"1217_CR13","doi-asserted-by":"publisher","unstructured":"He H, Bai Y, Garcia EA, Li S. ADASYN: Adaptive synthetic sampling approach for imbalanced learning. IEEE IJCNN. 2008. https:\/\/doi.org\/10.1109\/IJCNN.2008.4633969","DOI":"10.1109\/IJCNN.2008.4633969"},{"key":"1217_CR14","doi-asserted-by":"publisher","unstructured":"Sun Z, Ying W, Zhang W, Gong S. Under sampling method based on minority class density for imbalanced data. Expert Syst. Appl. 2024;249:123328\u2013123328. https:\/\/doi.org\/10.1016\/j.eswa.2024.123328","DOI":"10.1016\/j.eswa.2024.123328"},{"key":"1217_CR15","doi-asserted-by":"publisher","first-page":"5816","DOI":"10.1109\/ACCESS.2023.3236794","volume":"11","author":"Y Bao","year":"2023","unstructured":"Bao Y, Yang S. Two novel SMOTE methods for solving imbalanced classification problems. IEEE Access. 2023;11:5816\u201323. https:\/\/doi.org\/10.1109\/ACCESS.2023.3236794.","journal-title":"IEEE Access"},{"issue":"4","key":"1217_CR16","doi-asserted-by":"publisher","first-page":"2079","DOI":"10.1109\/tsmc.2023.3335241","volume":"54","author":"S Yan","year":"2024","unstructured":"Yan S, Zhao Z, Liu S, Zhou M. BO-SMOTE: A novel Bayesian-Optimization-Based synthetic minority oversampling technique. IEEE Trans Syst Man Cybernetics Syst. 2024;54(4):2079\u201391. https:\/\/doi.org\/10.1109\/tsmc.2023.3335241.","journal-title":"IEEE Trans Syst Man Cybernetics Syst"},{"key":"1217_CR17","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1016\/j.patrec.2019.10.029","volume":"128","author":"F Xiong","year":"2019","unstructured":"DKT, BGP, Xiong F. Auto-detection of epileptic seizure events using deep neural network with different feature scaling techniques. Pattern Recognit Lett. 2019;128:544\u201350. https:\/\/doi.org\/10.1016\/j.patrec.2019.10.029","journal-title":"Pattern Recognit Lett"},{"key":"1217_CR18","doi-asserted-by":"publisher","DOI":"10.1080\/1540496x.2020.1825935","author":"L Yu","year":"2020","unstructured":"Yu L, Zhou R, Chen R, Lai KK. Missing Data Preprocessing in Credit Classification: One-Hot Encoding or Imputation?. Emerg Markets Finance Trade. 2020;58(2):1\u201311. https:\/\/doi.org\/10.1080\/1540496x.2020.1825935"},{"issue":"1","key":"1217_CR19","doi-asserted-by":"publisher","first-page":"4039","DOI":"10.1609\/aaai.v33i01.33014039","volume":"33","author":"B Juba","year":"2019","unstructured":"Juba B, Le HS. Precision-Recall versus accuracy and the role of large data sets. Proc AAAI Conf Artif Intell. 2019;33(1):4039\u201348. https:\/\/doi.org\/10.1609\/aaai.v33i01.33014039.","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"13","key":"1217_CR20","doi-asserted-by":"publisher","first-page":"2577","DOI":"10.3390\/rs13132577","volume":"13","author":"M Lin","year":"2021","unstructured":"Lin M, Zhu X, Hua T, Tang X, Tu G, Chen X. Detection of ionospheric scintillation based on XGBoost model improved by SMOTE-ENN technique. Remote Sens. 2021;13(13):2577. https:\/\/doi.org\/10.3390\/rs13132577.","journal-title":"Remote Sens"},{"key":"1217_CR21","doi-asserted-by":"publisher","unstructured":"Nishat MM, Faisal F, Ratul IJ, Al-Monsur A, Ar-Rafi AM, Nasrullah SM, et al. A comprehensive investigation of the performances of different machine learning classifiers with SMOTE-ENN oversampling technique and hyperparameter optimization for imbalanced heart failure dataset. Sci. Program. 2022;1:3649406. https:\/\/doi.org\/10.1155\/2022\/3649406","DOI":"10.1155\/2022\/3649406"},{"key":"1217_CR22","doi-asserted-by":"publisher","DOI":"10.30630\/joiv.7.1.1069","author":"H Hairani","year":"2023","unstructured":"Hairani H, Anggrawan A, Priyanto D. Improvement performance of the random forest method on unbalanced diabetes data classification using smote-tomek link. JOIV. 2023; 7(1). https:\/\/doi.org\/10.30630\/joiv.7.1.1069"},{"key":"1217_CR23","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13434-6","author":"A Abd El-Naby","year":"2022","unstructured":"Abd El-Naby A, Hemdan EE-D, El-Sayed A. An efficient fraud detection framework with credit card imbalanced data in financial services. Multimedia Tools Appl. 2022. https:\/\/doi.org\/10.1007\/s11042-022-13434-6.","journal-title":"Multimedia Tools Appl"},{"key":"1217_CR24","doi-asserted-by":"publisher","unstructured":"Mahesh KP, Ajrouz SA, Areeckal AS. Praveen Mahesh K, Afrouz AS, Shaju Areeckal A. Detection of fraudulent credit card transactions: A comparative analysis of data sampling and classification techniques. Journal of Physics: Conf. Ser. 2022;2161(1):012072. https:\/\/doi.org\/10.1088\/1742-6596\/2161\/1\/012072","DOI":"10.1088\/1742-6596\/2161\/1\/012072"},{"key":"1217_CR25","doi-asserted-by":"publisher","unstructured":"Lu C, Lin S, Liu X, Shi H. Telecom Fraud Identification Based on ADASYN and Random Forest. International Conference on Computer and Communication Systems (ICCCS). 2020. https:\/\/doi.org\/10.1109\/icccs49078.2020.9118521","DOI":"10.1109\/icccs49078.2020.9118521"},{"key":"1217_CR26","doi-asserted-by":"publisher","DOI":"10.1109\/miticon.2016.8025244","author":"A Charleonnan","year":"2016","unstructured":"Charleonnan A. Credit card fraud detection using RUS and MRN algorithms. 2016 Manage Innov Technol Int Conf (MITicon). 2016. https:\/\/doi.org\/10.1109\/miticon.2016.8025244.","journal-title":"2016 Manage Innov Technol Int Conf (MITicon)"},{"key":"1217_CR27","unstructured":"Bahrami B, Hosseini Shirvani M. Prediction and Diagnosis of Heart Disease by Data Mining Techniques. J Multidisciplinary Eng Sci Technol (JMEST). 2015;2(2). https:\/\/www.jmest.org\/wp-content\/uploads\/JMESTN42350475.pdf"},{"key":"1217_CR28","doi-asserted-by":"publisher","unstructured":"Singh A, Jain A, Biable SE. Financial Fraud Detection Approach based on Firefly optimization algorithm and support Vector machine. Appl Comput Intell Soft Comput. 2022;1\u201310. https:\/\/doi.org\/10.1155\/2022\/1468015","DOI":"10.1155\/2022\/1468015"},{"key":"1217_CR29","doi-asserted-by":"publisher","DOI":"10.1109\/icmla.2015.80","author":"C Cody","year":"2015","unstructured":"Cody C, Ford V, Siraj A. Decision tree learning for fraud detection in consumer energy consumption. 2015 IEEE 14th Int Conf Mach Learn Appl (ICMLA). 2015. https:\/\/doi.org\/10.1109\/icmla.2015.80.","journal-title":"2015 IEEE 14th Int Conf Mach Learn Appl (ICMLA)"},{"key":"1217_CR30","unstructured":"Vehicle insurance claim Fraud Detection. (2021, December 20). Kaggle. https:\/\/www.kaggle.com\/datasets\/shivamb\/vehicle-claim-fraud-detection"},{"key":"1217_CR31","unstructured":"Auto insurance claims data. (2018, August 20). Kaggle. https:\/\/www.kaggle.com\/datasets\/buntyshah\/auto-insurance-claims-data"},{"key":"1217_CR32","unstructured":"Fraudulent Claims Detection Dataset. (2024, March 11). Kaggle. https:\/\/www.kaggle.com\/datasets\/ravalsmit\/fraudulent-claims-detection-dataset"},{"issue":"1","key":"1217_CR33","doi-asserted-by":"publisher","first-page":"11","DOI":"10.54216\/JAIM.090102","volume":"9","author":"SK Kumar","year":"2025","unstructured":"Kumar SK, Kumar M. Predictive analysis of groundwater resources using random forest regression. J Artif Intell Metaheuristics. 2025;9(1):11\u20139. https:\/\/doi.org\/10.54216\/JAIM.090102.","journal-title":"J Artif Intell Metaheuristics"},{"issue":"2","key":"1217_CR34","doi-asserted-by":"publisher","first-page":"47","DOI":"10.54216\/MOR.030205","volume":"3","author":"M Elshabrawy","year":"2025","unstructured":"Elshabrawy M. A review on waste management techniques for sustainable energy production. Metaheuristic Optim Rev. 2025;3(2):47\u201358. https:\/\/doi.org\/10.54216\/MOR.030205.","journal-title":"Metaheuristic Optim Rev"},{"key":"1217_CR35","doi-asserted-by":"publisher","first-page":"695","DOI":"10.1007\/s11540-024-09760-x","volume":"68","author":"SA Alzakari","year":"2025","unstructured":"Alzakari SA, Alhussan AA, Qenawy AST, Others. Early detection of potato disease using an enhanced convolutional neural network\u2013long short-term memory deep learning model. Potato Res. 2025;68:695\u2013713. https:\/\/doi.org\/10.1007\/s11540-024-09760-x.","journal-title":"Potato Res"},{"key":"1217_CR36","doi-asserted-by":"publisher","first-page":"759","DOI":"10.1007\/s11540-024-09753-w","volume":"68","author":"E-SM El-Kenawy","year":"2025","unstructured":"El-Kenawy E-SM, Alhussan AA, Khodadadi N, Mirjalili S, Eid MM. Predicting potato crop yield with machine learning and deep learning for sustainable agriculture. Potato Res. 2025;68:759\u201392. https:\/\/doi.org\/10.1007\/s11540-024-09753-w.","journal-title":"Potato Res"},{"key":"1217_CR37","doi-asserted-by":"publisher","first-page":"122147","DOI":"10.1016\/j.eswa.2023.122147","volume":"238","author":"E-SM El-Kenawy","year":"2024","unstructured":"El-Kenawy E-SM, Khodadadi N, Mirjalili S, Abdelhamid AA, Eid MM, Ibrahim A. Greylag Goose optimization: Nature-inspired optimization algorithm. Expert Syst Appl. 2024;238:122147. https:\/\/doi.org\/10.1016\/j.eswa.2023.122147.","journal-title":"Expert Syst Appl"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01217-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s40537-025-01217-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01217-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T15:04:59Z","timestamp":1752764699000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-025-01217-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,17]]},"references-count":37,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1217"],"URL":"https:\/\/doi.org\/10.1186\/s40537-025-01217-3","relation":{"references":[{"id-type":"doi","id":"10.1080\/1540496x.2020.1825935","asserted-by":"subject"},{"id-type":"doi","id":"10.30630\/joiv.7.1.1069","asserted-by":"subject"}]},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,17]]},"assertion":[{"value":"28 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"172"}}