{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T04:50:49Z","timestamp":1774500649363,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T00:00:00Z","timestamp":1692576000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T00:00:00Z","timestamp":1692576000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61872324"],"award-info":[{"award-number":["61872324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1007\/s00607-023-01206-5","type":"journal-article","created":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:02:00Z","timestamp":1692637320000},"page":"29-55","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Feature reduction of unbalanced data classification based on density clustering"],"prefix":"10.1007","volume":"106","author":[{"given":"Zhen-Fei","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pei-Yao","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong-Ya","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7742-4985","authenticated-orcid":false,"given":"Li-Ying","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,21]]},"reference":[{"key":"1206_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.112866","volume":"140","author":"D Devarriya","year":"2020","unstructured":"Devarriya D, Gulati C, Mansharamani V, Sakalle A, Bhardwaj A (2020) Unbalanced breast cancer data classification using novel fitness functions in genetic programming. Expert Syst Appl 140:112866. https:\/\/doi.org\/10.1016\/j.eswa.2019.112866","journal-title":"Expert Syst Appl"},{"issue":"10","key":"1206_CR2","doi-asserted-by":"publisher","first-page":"2776","DOI":"10.1109\/JBHI.2020.3012383","volume":"24","author":"J Bridge","year":"2020","unstructured":"Bridge J, Meng Y, Zhao Y, Du Y, Zhao M, Sun R, Zheng Y (2020) Introducing the gev activation function for highly unbalanced data to develop covid-19 diagnostic models. IEEE J Biomed Health Inform 24(10):2776\u20132786","journal-title":"IEEE J Biomed Health Inform"},{"key":"1206_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2019.106266","volume":"140","author":"D Gan","year":"2020","unstructured":"Gan D, Shen J, An B, Xu M, Liu N (2020) Integrating tanbn with cost sensitive classification algorithm for imbalanced data in medical diagnosis. Comput Ind Eng 140:106266","journal-title":"Comput Ind Eng"},{"key":"1206_CR4","doi-asserted-by":"crossref","unstructured":"Btoush E, Zhou X, Gururaian R, Chan K, Tao X (2021) A survey on credit card fraud detection techniques in banking industry for cyber security. In: 2021 8th international conference on behavioral and social computing (BESC). IEEE, pp 1\u20137","DOI":"10.1109\/BESC53957.2021.9635559"},{"key":"1206_CR5","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1016\/j.ins.2017.12.030","volume":"479","author":"U Fiore","year":"2019","unstructured":"Fiore U, De Santis A, Perla F, Zanetti P, Palmieri F (2019) Using generative adversarial networks for improving classification effectiveness in credit card fraud detection. Inf Sci 479:448\u2013455","journal-title":"Inf Sci"},{"key":"1206_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114750","volume":"175","author":"Z Li","year":"2021","unstructured":"Li Z, Huang M, Liu G, Jiang C (2021) A hybrid method with dynamic weighted entropy for handling the problem of class imbalance with overlap in credit card fraud detection. Expert Syst Appl 175:114750","journal-title":"Expert Syst Appl"},{"issue":"7","key":"1206_CR7","doi-asserted-by":"publisher","first-page":"6248","DOI":"10.1109\/TIE.2020.2994868","volume":"68","author":"Q Shi","year":"2020","unstructured":"Shi Q, Zhang H (2020) Fault diagnosis of an autonomous vehicle with an improved svm algorithm subject to unbalanced datasets. IEEE Trans Ind Electron 68(7):6248\u20136256","journal-title":"IEEE Trans Ind Electron"},{"key":"1206_CR8","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.isatra.2021.02.042","volume":"119","author":"T Zhang","year":"2022","unstructured":"Zhang T, Chen J, Li F, Zhang K, Lv H, He S, Xu E (2022) Intelligent fault diagnosis of machines with small and imbalanced data: a state-of-the-art review and possible extensions. ISA Trans 119:152\u2013171","journal-title":"ISA Trans"},{"issue":"2","key":"1206_CR9","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1007\/s10845-020-01579-w","volume":"32","author":"J Luo","year":"2021","unstructured":"Luo J, Huang J, Li H (2021) A case study of conditional deep convolutional generative adversarial networks in machine fault diagnosis. J Intell Manuf 32(2):407\u2013425","journal-title":"J Intell Manuf"},{"key":"1206_CR10","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1016\/j.eswa.2017.03.057","volume":"81","author":"D Agnihotri","year":"2017","unstructured":"Agnihotri D, Verma K, Tripathi P (2017) Variable global feature selection scheme for automatic classification of text documents. Expert Syst Appl 81:268\u2013281","journal-title":"Expert Syst Appl"},{"key":"1206_CR11","doi-asserted-by":"crossref","unstructured":"Christensen R (2018) Analysis of variance, design, and regression: linear modeling for unbalanced data","DOI":"10.1201\/9781315370095"},{"key":"1206_CR12","doi-asserted-by":"crossref","unstructured":"Liu X, Li N, Liu S, Wang J, Zhang N, Zheng X, Leung K-S, Cheng L (2019) Normalization methods for the analysis of unbalanced transcriptome data: a review. Front Bioeng Biotechnol 358","DOI":"10.3389\/fbioe.2019.00358"},{"key":"1206_CR13","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002) Smote: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357","journal-title":"J Artif Intell Res"},{"key":"1206_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.116051","volume":"188","author":"D Liang","year":"2022","unstructured":"Liang D, Yi B, Cao W, Zheng Q (2022) Exploring ensemble oversampling method for imbalanced keyword extraction learning in policy text based on three-way decisions and smote. Expert Syst Appl 188:116051","journal-title":"Expert Syst Appl"},{"key":"1206_CR15","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.patrec.2016.10.006","volume":"93","author":"D Devi","year":"2017","unstructured":"Devi D, Purkayastha B et al (2017) Redundancy-driven modified tomek-link based undersampling: a solution to class imbalance. Pattern Recogn Lett 93:3\u201312","journal-title":"Pattern Recogn Lett"},{"key":"1206_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107262","volume":"102","author":"M Koziarski","year":"2020","unstructured":"Koziarski M (2020) Radial-based undersampling for imbalanced data classification. Pattern Recogn 102:107262","journal-title":"Pattern Recogn"},{"key":"1206_CR17","doi-asserted-by":"publisher","first-page":"591","DOI":"10.1016\/j.ins.2022.02.004","volume":"593","author":"L Sun","year":"2022","unstructured":"Sun L, Zhang J, Ding W, Xu J (2022) Feature reduction for imbalanced data classification using similarity-based feature clustering with adaptive weighted k-nearest neighbors. Inf Sci 593:591\u2013613","journal-title":"Inf Sci"},{"key":"1206_CR18","unstructured":"Quinlan JR (2014) C4. 5: programs for machine learning"},{"issue":"2","key":"1206_CR19","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1023\/A:1007413511361","volume":"29","author":"P Domingos","year":"1997","unstructured":"Domingos P, Pazzani M (1997) On the optimality of the simple Bayesian classifier under zero-one loss. Mach Learn 29(2):103\u2013130","journal-title":"Mach Learn"},{"key":"1206_CR20","doi-asserted-by":"crossref","unstructured":"Vapnik V (1999) The nature of statistical learning theory","DOI":"10.1007\/978-1-4757-3264-1"},{"issue":"2","key":"1206_CR21","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1109\/TSMCB.2011.2168604","volume":"42","author":"G-B Huang","year":"2011","unstructured":"Huang G-B, Zhou H, Ding X, Zhang R (2011) Extreme learning machine for regression and multiclass classification. IEEE Trans Syst Man Cybern Part B (Cybern) 42(2):513\u2013529","journal-title":"IEEE Trans Syst Man Cybern Part B (Cybern)"},{"key":"1206_CR22","unstructured":"Elkan C (2001) The foundations of cost-sensitive learning. In: International joint conference on artificial intelligence, vol 17. Lawrence Erlbaum Associates Ltd, pp 973\u2013978"},{"issue":"3","key":"1206_CR23","first-page":"604","volume":"30","author":"QI Zhixin","year":"2019","unstructured":"Zhixin QI, Hongzhi ZXWANG (2019) Cost-sensitive decision tree induction on dirty data. J Softw 30(3):604","journal-title":"J Softw"},{"issue":"05","key":"1206_CR24","doi-asserted-by":"publisher","first-page":"44","DOI":"10.15938\/j.jhust.2021.05.006","volume":"26","author":"YSG Zhou","year":"2021","unstructured":"Zhou YSG (2021) Double cost sensitive random forest algorithm. J Harbin Univ Sci Technol 26(05):44\u201350. https:\/\/doi.org\/10.15938\/j.jhust.2021.05.006","journal-title":"J Harbin Univ Sci Technol"},{"key":"1206_CR25","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/S0169-7161(04)24011-1","volume":"24","author":"CD Sutton","year":"2005","unstructured":"Sutton CD (2005) Classification and regression trees, bagging, and boosting. Handb Stat 24:303\u2013329","journal-title":"Handb Stat"},{"issue":"2","key":"1206_CR26","doi-asserted-by":"publisher","first-page":"165","DOI":"10.60090\/kar.v2i2.589.165-178","volume":"2","author":"HP Koapaha","year":"2021","unstructured":"Koapaha HP, Ananto N (2021) Bagging based ensemble analysis in handling unbalanced data on classification modeling. Klabat Account Rev 2(2):165\u2013178","journal-title":"Klabat Account Rev"},{"key":"1206_CR27","doi-asserted-by":"crossref","unstructured":"Thakkar HK, Desai A, Ghosh S, Singh P, Sharma G (2022) Clairvoyant: adaboost with cost-enabled cost-sensitive classifier for customer churn prediction. Comput Intell Neurosci 2022","DOI":"10.1155\/2022\/9028580"},{"key":"1206_CR28","doi-asserted-by":"crossref","unstructured":"Chen X-w, Wasikowski M (2008) Fast: a roc-based feature selection metric for small samples and imbalanced data classification problems. In: Proceedings of the 14th ACM SIGKDD international conference on knowledge discovery and data mining, pp 124\u2013132","DOI":"10.1145\/1401890.1401910"},{"key":"1206_CR29","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hulse J, Khoshgoftaar TM, Napolitano A, Wald R (2009) Feature selection with high-dimensional imbalanced data. In: 2009 IEEE international conference on data mining workshops. IEEE, pp 507\u2013514","DOI":"10.1109\/ICDMW.2009.35"},{"issue":"12","key":"1206_CR30","doi-asserted-by":"publisher","first-page":"5343","DOI":"10.1109\/TIP.2015.2479560","volume":"24","author":"Z Li","year":"2015","unstructured":"Li Z, Tang J (2015) Unsupervised feature selection via nonnegative spectral analysis and redundancy control. IEEE Trans Image Process 24(12):5343\u20135355","journal-title":"IEEE Trans Image Process"},{"issue":"6","key":"1206_CR31","doi-asserted-by":"publisher","first-page":"5845","DOI":"10.3233\/JIFS-181665","volume":"36","author":"A Nagpal","year":"2019","unstructured":"Nagpal A, Singh V (2019) Feature selection from high dimensional data based on iterative qualitative mutual information. J Intell Fuzzy Syst 36(6):5845\u20135856","journal-title":"J Intell Fuzzy Syst"},{"issue":"1","key":"1206_CR32","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1109\/TPAMI.2019.2929166","volume":"43","author":"X-Y Jing","year":"2019","unstructured":"Jing X-Y, Zhang X, Zhu X, Wu F, You X, Gao Y, Shan S, Yang J-Y (2019) Multiset feature learning for highly imbalanced data classification. IEEE Trans Pattern Anal Mach Intell 43(1):139\u2013156","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1206_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107625","volume":"110","author":"J Saha","year":"2021","unstructured":"Saha J, Mukherjee J (2021) Cnak: cluster number assisted k-means. Pattern Recogn 110:107625","journal-title":"Pattern Recogn"},{"key":"1206_CR34","unstructured":"Krogh A, Vedelsby J (1994) Neural network ensembles, cross validation, and active learning. In: Advances in neural information processing systems 7"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-023-01206-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-023-01206-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-023-01206-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,17]],"date-time":"2024-01-17T17:41:00Z","timestamp":1705513260000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-023-01206-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,21]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,1]]}},"alternative-id":["1206"],"URL":"https:\/\/doi.org\/10.1007\/s00607-023-01206-5","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"value":"0010-485X","type":"print"},{"value":"1436-5057","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,21]]},"assertion":[{"value":"24 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 July 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"I declare that the authors have no competing or other interests that could be used to affect the findings and\/or conclusions described in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Yes.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Yes.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}