{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T20:14:56Z","timestamp":1767212096066,"version":"3.37.3"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T00:00:00Z","timestamp":1687392000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T00:00:00Z","timestamp":1687392000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-023-01880-4","type":"journal-article","created":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T01:01:49Z","timestamp":1687482109000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Exploring Maximum Tree Depth and Random Undersampling in Ensemble Trees to Optimize the Classification of Imbalanced Big Data"],"prefix":"10.1007","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0699-3042","authenticated-orcid":false,"suffix":"III","given":"John T.","family":"Hancock","sequence":"first","affiliation":[]},{"given":"Taghi M.","family":"Khoshgoftaar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,22]]},"reference":[{"key":"1880_CR1","unstructured":"The Centers for Medicare and Medicaid Services: Medicare Physician & Other Practitioners: by provider and service. 2021. https:\/\/data.cms.gov\/provider-summary-by-type-of-service\/medicare-physician-other-practitioners\/medicare-physician-other-practitioners-by-provider-and-service. Accessed 9 May 2022."},{"key":"1880_CR2","unstructured":"The Centers for Medicare and Medicaid Services: Medicare Part D Prescribers: by provider and drug. 2021. https:\/\/data.cms.gov\/provider-summary-by-type-of-service\/medicare-part-d-prescribers\/medicare-part-d-prescribers-by-provider-and-drug. Accessed 18 Feb 2022."},{"key":"1880_CR3","doi-asserted-by":"crossref","unstructured":"Centers for Medicare and Medicaid Services: 2019 Estimated Improper Payment Rates for Centers for Medicare & Medicaid Services (CMS) Programs. 2019. https:\/\/www.cms.gov\/newsroom\/fact-sheets\/2019-estimated-improper-payment-rates-centers-medicare-medicaid-services-cms-programs.  Accessed 1 Mar 2022.","DOI":"10.37573\/9781585284474.001"},{"key":"1880_CR4","unstructured":"Civil Division, U.S. Department of Justice: Fraud Statistics, Overview. 2020. https:\/\/www.justice.gov\/opa\/press-release\/file\/1354316\/download. Accessed 18 Jan 2022."},{"issue":"10","key":"1880_CR5","first-page":"27","volume":"3","author":"M Bekkar","year":"2013","unstructured":"Bekkar M, Djemaa HK, Alitouche TA. Evaluation measures for models assessment over imbalanced data sets. J Inf Eng Appl. 2013;3(10):27\u201338.","journal-title":"J Inf Eng Appl."},{"key":"1880_CR6","doi-asserted-by":"crossref","unstructured":"Hancock J, Khoshgoftaar TM. Optimizing ensemble trees for big data healthcare fraud detection. In: 2022 IEEE 23rd international conference on information reuse and integration for data science (IRI); 2022. IEEE. p. 243\u201349","DOI":"10.1109\/IRI54793.2022.00061"},{"key":"1880_CR7","first-page":"1","volume":"31","author":"L Prokhorenkova","year":"2018","unstructured":"Prokhorenkova L, Gusev G, Vorobev A, Dorogush AV, Gulin A. Catboost: unbiased boosting with categorical features. Adv Neural Inf Process Syst. 2018;31:1\u201311.","journal-title":"Adv Neural Inf Process Syst."},{"key":"1880_CR8","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C. Xgboost: a scalable tree boosting system. Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining-KDD \u201916; 2016.","DOI":"10.1145\/2939672.2939785"},{"issue":"1","key":"1880_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L. Random forests. Mach Learn. 2001;45(1):5\u201332.","journal-title":"Mach Learn"},{"issue":"1","key":"1880_CR10","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10994-006-6226-1","volume":"63","author":"P Geurts","year":"2006","unstructured":"Geurts P, Ernst D, Wehenkel L. Extremely randomized trees. Mach Learn. 2006;63(1):3\u201342.","journal-title":"Mach Learn"},{"key":"1880_CR11","doi-asserted-by":"crossref","unstructured":"Bauder RA, Khoshgoftaar TM, Hasanin T. Data sampling approaches with severely imbalanced big data for medicare fraud detection. In: 2018 IEEE 30th international conference on tools with artificial intelligence (ICTAI); 2018. IEEE. p. 137\u201342","DOI":"10.1109\/ICTAI.2018.00030"},{"key":"1880_CR12","unstructured":"The Centers for Medicare and Medicaid Services: Medicare Durable Medical Equipment, Devices & Supplies: by Referring Provider and Service. 2021. https:\/\/data.cms.gov\/provider-summary-by-type-of-service\/medicare-durable-medical-equipment-devices-supplies\/medicare-durable-medical-equipment-devices-supplies-by-referring-provider-and-service.  Accessed 18 Jan 2022"},{"issue":"1","key":"1880_CR13","first-page":"191","volume":"41","author":"S Le Cessie","year":"1992","unstructured":"Le Cessie S, Van Houwelingen JC. Ridge estimators in logistic regression. J R Stat Soc Ser C (Appl Stat). 1992;41(1):191\u2013201.","journal-title":"J R Stat Soc Ser C (Appl Stat)"},{"issue":"1","key":"1880_CR14","first-page":"1235","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng X, Bradley J, Yavuz B, Sparks E, Venkataraman S, Liu D, Freeman J, Tsai D, Amde M, Owen S. Mllib: machine learning in apache spark. J Mach Learn Res. 2016;17(1):1235\u201341.","journal-title":"J Mach Learn Res"},{"issue":"11","key":"1880_CR15","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia M, Xin RS, Wendell P, Das T, Armbrust M, Dave A, Meng X, Rosen J, Venkataraman S, Franklin MJ. Apache spark: a unified engine for big data processing. Commun ACM. 2016;59(11):56\u201365.","journal-title":"Commun ACM"},{"key":"1880_CR16","doi-asserted-by":"crossref","unstructured":"Han H, Wang W-Y, Mao B-H. Borderline-smote: a new over-sampling method in imbalanced data sets learning. In: International conference on intelligent computing; 2005. Springer. p. 878\u2013887","DOI":"10.1007\/11538059_91"},{"key":"1880_CR17","doi-asserted-by":"publisher","first-page":"16568","DOI":"10.1109\/ACCESS.2017.2738069","volume":"5","author":"W Lin","year":"2017","unstructured":"Lin W, Wu Z, Lin L, Wen A, Li J. An ensemble random forest algorithm for insurance big data analysis. IEEE Access. 2017;5:16568\u201375.","journal-title":"IEEE Access"},{"key":"1880_CR18","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.ins.2014.03.043","volume":"285","author":"S Del R\u00edo","year":"2014","unstructured":"Del R\u00edo S, L\u00f3pez V, Ben\u00edtez JM, Herrera F. On the use of mapreduce for imbalanced big data using random forest. Inf Sci. 2014;285:112\u201337.","journal-title":"Inf Sci"},{"issue":"1","key":"1880_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0232-1","volume":"6","author":"VM Herrera","year":"2019","unstructured":"Herrera VM, Khoshgoftaar TM, Villanustre F, Furht B. Random forest implementation and optimization for big data analytics on lexisnexis\u20195s high performance computing cluster platform. J Big Data. 2019;6(1):1\u201336.","journal-title":"J Big Data"},{"key":"1880_CR20","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.bdr.2017.07.003","volume":"9","author":"R Genuer","year":"2017","unstructured":"Genuer R, Poggi J-M, Tuleau-Malot C, Villa-Vialaneix N. Random forests for big data. Big Data Res. 2017;9:28\u201346.","journal-title":"Big Data Res"},{"issue":"2","key":"1880_CR21","first-page":"159","volume":"10","author":"MA Fauzan","year":"2018","unstructured":"Fauzan MA, Murfi H. The accuracy of xgboost for insurance claim prediction. Int J Adv Soft Comput Appl. 2018;10(2):159\u201371.","journal-title":"Int J Adv Soft Comput Appl"},{"issue":"3","key":"1880_CR22","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/MIS.2020.2972533","volume":"35","author":"H Li","year":"2020","unstructured":"Li H, Cao Y, Li S, Zhao J, Sun Y. Xgboost model and its application to personal credit evaluation. IEEE Intell Syst. 2020;35(3):52\u201361.","journal-title":"IEEE Intell Syst"},{"key":"1880_CR23","doi-asserted-by":"crossref","unstructured":"XingFen W, Xiangbin Y, Yangchun M. Research on user consumption behavior prediction based on improved xgboost algorithm. In: 2018 IEEE international conference on big data (Big Data); 2018. IEEE. p. 4169\u2013175.","DOI":"10.1109\/BigData.2018.8622235"},{"key":"1880_CR24","doi-asserted-by":"crossref","unstructured":"Johnson JM, Khoshgoftaar TM. Deep learning and data sampling with imbalanced big data. In: 2019 IEEE 20th international conference on information reuse and integration for data science (IRI); 2019. IEEE. p. 175\u201383.","DOI":"10.1109\/IRI.2019.00038"},{"key":"1880_CR25","unstructured":"LEIE: Office of Inspector General Leie Downloadable Databases. [Online]. https:\/\/oig.hhs.gov\/exclusions\/index.asp. Accessed 12 Apr 2022"},{"issue":"1","key":"1880_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-018-0138-3","volume":"5","author":"M Herland","year":"2018","unstructured":"Herland M, Khoshgoftaar TM, Bauder RA. Big data fraud detection using multiple medicare data sources. J Big Data. 2018;5(1):1\u201321.","journal-title":"J Big Data"},{"key":"1880_CR27","unstructured":"The Centers for Medicare and Medicaid Services: Medicare Physician & Other Practitioners: by Provider and Service Data Dictionary. 2021. https:\/\/data.cms.gov\/resources\/medicare-physician-other-practitioners-by-provider-and-service-data-dictionary. Accessed 28 Jan 2022."},{"key":"1880_CR28","unstructured":"The Centers for Medicare and Medicaid Services: Medicare Part D Prescribers: by provider and drug data dictionary. 2021. https:\/\/data.cms.gov\/resources\/medicare-part-d-prescribers-by-provider-and-drug-data-dictionary. Accessed 4 May 2022."},{"issue":"2","key":"1880_CR29","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/BF00058655","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman L. Bagging predictors. Mach Learn. 1996;24(2):123\u201340.","journal-title":"Mach Learn"},{"key":"1880_CR30","doi-asserted-by":"crossref","unstructured":"Hancock J, Khoshgoftaar TM. Performance of catboost and xgboost in medicare fraud detection. In: 2020 19th IEEE international conference on machine learning and applications (ICMLA); 2020. IEEE. p. 572\u201379.","DOI":"10.1109\/ICMLA51294.2020.00095"},{"key":"1880_CR31","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2001","unstructured":"Friedman JH. Greedy function approximation: a gradient boosting machine. Ann Stat. 2001;29:1189\u2013232.","journal-title":"Ann Stat."},{"issue":"1","key":"1880_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-020-00369-8","volume":"7","author":"JT Hancock","year":"2020","unstructured":"Hancock JT, Khoshgoftaar TM. Catboost for big data: an interdisciplinary review. J Big Data. 2020;7(1):1\u201345.","journal-title":"J Big Data"},{"key":"1880_CR33","unstructured":"Van Rossum G, Drake F. Python 3 reference manual createspace. Scotts Valley; 2009."},{"key":"1880_CR34","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, Blondel M, Prettenhofer P, Weiss R, Dubourg V. Scikit-learn: machine learning in python. J Mach Learn Res. 2011;12:2825\u201330.","journal-title":"J Mach Learn Res"},{"key":"1880_CR35","doi-asserted-by":"crossref","unstructured":"Johnson JM, Khoshgoftaar TM. Hcpcs2vec: Healthcare procedure embeddings for medicare fraud prediction. In: 2020 IEEE 6th international conference on collaboration and internet computing (CIC); 2020. IEEE. p. 145\u201352.","DOI":"10.1109\/CIC50333.2020.00026"},{"issue":"1","key":"1880_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-020-00305-w","volume":"7","author":"JT Hancock","year":"2020","unstructured":"Hancock JT, Khoshgoftaar TM. Survey on categorical data for neural networks. J Big Data. 2020;7(1):1\u201341.","journal-title":"J Big Data"},{"key":"1880_CR37","unstructured":"Parameters. Yandex Corporation. https:\/\/catboost.ai\/en\/docs\/references\/training-parameters\/common. Accessed 09 July 2022"},{"key":"1880_CR38","unstructured":"XGBoost Parameters. XGBoost Developers. https:\/\/xgboost.readthedocs.io\/en\/stable\/parameter.html. Accessed 09 July 2022."},{"issue":"6","key":"1880_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s42979-022-01348-x","volume":"3","author":"JT Hancock","year":"2022","unstructured":"Hancock JT, Khoshgoftaar TM. Hyperparameter tuning for medicare fraud detection in big data. SN Comput Sci. 2022;3(6):1\u201313.","journal-title":"SN Comput Sci"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-01880-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-023-01880-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-01880-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T19:10:45Z","timestamp":1687547445000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-023-01880-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,22]]},"references-count":39,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2023,9]]}},"alternative-id":["1880"],"URL":"https:\/\/doi.org\/10.1007\/s42979-023-01880-4","relation":{},"ISSN":["2661-8907"],"issn-type":[{"type":"electronic","value":"2661-8907"}],"subject":[],"published":{"date-parts":[[2023,6,22]]},"assertion":[{"value":"22 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 June 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"462"}}