{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T21:43:12Z","timestamp":1778708592633,"version":"3.51.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,12,1]],"date-time":"2017-12-01T00:00:00Z","timestamp":1512086400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1186\/s40537-017-0108-1","type":"journal-article","created":{"date-parts":[[2017,12,28]],"date-time":"2017-12-28T01:17:57Z","timestamp":1514423877000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Improved classification of large imbalanced data sets using rationalized technique: Updated Class Purity Maximization Over_Sampling Technique (UCPMOT)"],"prefix":"10.1186","volume":"4","author":[{"given":"Sachin S.","family":"Patil","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shefali P.","family":"Sonavane","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,12,28]]},"reference":[{"key":"108_CR1","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1109\/TKDE.2013.109","volume":"26","author":"X Wu","year":"2014","unstructured":"Wu X, et al. Data mining with big data. IEEE Trans Knowl Data Eng. 2014;26:97\u2013107.","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"108_CR2","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1016\/j.ijinfomgt.2014.10.007","volume":"35","author":"A Gandomi","year":"2015","unstructured":"Gandomi A, Haider M. Beyond the hype: big data concepts, methods, and analytics. Int J Inform Manag. 2015;35:137\u201344.","journal-title":"Int J Inform Manag"},{"key":"108_CR3","unstructured":"Agrawal D, et al. Challenges and opportunity with big data. A community white paper developed by leading researchers across the United States; 2012. p. 1\u201317."},{"key":"108_CR4","first-page":"674","volume":"5931","author":"W Zhao","year":"2009","unstructured":"Zhao W, Ma H, He Q. Parallel K-means clustering based on mapreduce. CloudCom LNCS. 2009;5931:674\u20139.","journal-title":"CloudCom LNCS"},{"key":"108_CR5","doi-asserted-by":"publisher","first-page":"514","DOI":"10.1109\/ACCESS.2014.2325029","volume":"2","author":"X Chen","year":"2014","unstructured":"Chen X, Lin X. Big data deep learning: challenges and perspectives. IEEE Access. 2014;2:514\u201325. https:\/\/doi.org\/10.1109\/ACCESS.2014.2325029 .","journal-title":"IEEE Access"},{"key":"108_CR6","unstructured":"Dandawate Y, et al. Big Data: challenges and opportunities, Infosys Labs Briefings\u2014Infosys Labs. DIALOG. 2013. http:\/\/www.infosys.com\/infosys-labs\/publications\/Documents\/bigdata-challenges-opportunities.pdf . Accessed Dec 2014."},{"key":"108_CR7","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.ins.2013.07.007","volume":"250","author":"V L\u00f3pez","year":"2013","unstructured":"L\u00f3pez V, et al. An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics. J Inform Sci. 2013;250:113\u201341. https:\/\/doi.org\/10.1016\/j.ins.2013.07.007 .","journal-title":"J Inform Sci"},{"key":"108_CR8","doi-asserted-by":"crossref","first-page":"429","DOI":"10.3233\/IDA-2002-6504","volume":"6","author":"N Japkowicz","year":"2002","unstructured":"Japkowicz N, Stephen S. The class imbalance problem: a systematic study. ACM J Intell Data Anal. 2002;6:429\u201349.","journal-title":"ACM J Intell Data Anal"},{"key":"108_CR9","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He H, Garcia E. Learning from imbalanced data. IEEE Trans Knowl Data Eng. 2009;21:1263\u201384.","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"108_CR10","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1142\/S0218001409007326","volume":"23","author":"Y Sun","year":"2009","unstructured":"Sun Y, Wong A, Kamel M. Classification of imbalanced data: a review. Int J Pattern Recog A I. 2009;23:687\u2013719. https:\/\/doi.org\/10.1142\/S0218001409007326 .","journal-title":"Int J Pattern Recog A I"},{"key":"108_CR11","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1016\/j.eswa.2016.12.035","volume":"73","author":"H Guo","year":"2017","unstructured":"Guo H, et al. Learning from class-imbalanced data: review of methods and applications. Elsevier Exp Syst Appl. 2017;73:220\u201339.","journal-title":"Elsevier Exp Syst Appl"},{"key":"108_CR12","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.ins.2011.01.026","volume":"229","author":"B Park","year":"2013","unstructured":"Park B, Oh S, Pedrycz W. The design of polynomial function-based neural network predictors for detection of software defects. J Inform Sci. 2013;229:40\u201357. https:\/\/doi.org\/10.1016\/j.ins.2011.01.026 .","journal-title":"J Inform Sci"},{"key":"108_CR13","doi-asserted-by":"publisher","unstructured":"Vorobeva A. Examining the performance of classification algorithms for imbalanced data sets in web author identification. In: IEEE open innovations association and seminar on information security and protection of information technology: FRUCT-ISPIT. 2016. p. 385\u2013390. https:\/\/doi.org\/10.1109\/fruct-ispit.2016.7561554 .","DOI":"10.1109\/fruct-ispit.2016.7561554"},{"key":"108_CR14","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.ins.2013.07.007","volume":"285","author":"S Rio","year":"2014","unstructured":"Rio S, Lopez V, Benitez J, Herrera F. On the use of MapReduce for imbalanced big data using Random Forest. J Inform Sci. 2014;285:112\u201337. https:\/\/doi.org\/10.1016\/j.ins.2013.07.007 .","journal-title":"J Inform Sci"},{"key":"108_CR15","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1007\/s10586-014-0400-1","volume":"18","author":"H Jiang","year":"2015","unstructured":"Jiang H, ChenY Qiao Z. Scaling up MapReduce-based Big Data processing on multi-GPU systems. SpingerLink Clu Comp. 2015;18:369\u201383. https:\/\/doi.org\/10.1007\/s10586-014-0400-1 .","journal-title":"SpingerLink Clu Comp"},{"key":"108_CR16","doi-asserted-by":"publisher","DOI":"10.1109\/iadcc.2015.7154740","author":"M Nadaf","year":"2015","unstructured":"Nadaf M, Patil S. Performance evaluation of categorizing technical support requests using advanced K-means algorithm. IEEE Int Adv Com Conf. 2015. https:\/\/doi.org\/10.1109\/iadcc.2015.7154740 .","journal-title":"IEEE Int Adv Com Conf"},{"key":"108_CR17","doi-asserted-by":"publisher","DOI":"10.1109\/iadcc.2015.7154739","author":"R Bhagat","year":"2015","unstructured":"Bhagat R, Patil S. Enhanced SMOTE algorithm for classification of imbalanced BigData using Random Forest. IEEE Int Adv Com Conf. 2015. https:\/\/doi.org\/10.1109\/iadcc.2015.7154739 .","journal-title":"IEEE Int Adv Com Conf"},{"key":"108_CR18","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"G Batista","year":"2004","unstructured":"Batista G, Prati R, Monard M. A study of the behaviour of several methods for balancing machine learning training data. ACM Sigkdd Expl Newslett. 2004;6:20\u20139. https:\/\/doi.org\/10.1145\/1007730.1007735 .","journal-title":"ACM Sigkdd Expl Newslett"},{"key":"108_CR19","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"N Chawla","year":"2002","unstructured":"Chawla N, Bowyer K, Hall L, Kegelmeyer W. SMOTE: synthetic minority over-sampling technique. J Artif Intell Res. 2002;16:321\u201357.","journal-title":"J Artif Intell Res"},{"key":"108_CR20","doi-asserted-by":"crossref","unstructured":"Han H, Wang W, Mao B. Borderline-SMOTE: a new over-sampling method in imbalanced data sets learning. In: Proceeding of the 2005 international conference on intelligence computing. 2005. p. 878\u201387.","DOI":"10.1007\/11538059_91"},{"key":"108_CR21","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1007\/978-3-642-01307-2_43","volume-title":"Advances in knowledge discovery and data mining. PAKDD","author":"C Bunkhumpornpat","year":"2009","unstructured":"Bunkhumpornpat C, Sinapiromsaran K. Lursinsap C (2009) Safe-Level-Smote: safe-level-synthetic minority over-sampling technique for handling the class imbalanced problem. In: Theeramunkong T, Kijsirikul B, Cercone N, Ho TB, editors. Advances in knowledge discovery and data mining. PAKDD, vol. 5476. Berlin: Springer; 2009. p. 475\u201382. https:\/\/doi.org\/10.1007\/978-3-642-01307-2_43 ."},{"key":"108_CR22","doi-asserted-by":"publisher","unstructured":"He H, Bai Y, Garcia E, Li S. ADASYN: adaptive synthetic sampling approach for imbalanced learning. In: Proceedings of the 2008 IEEE international joint conference on neural networks. 2008. p. 1322\u20138. https:\/\/doi.org\/10.1109\/ijcnn.2008.4633969 .","DOI":"10.1109\/ijcnn.2008.4633969"},{"key":"108_CR23","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.knosys.2011.01.012","volume":"25","author":"S Garcia","year":"2012","unstructured":"Garcia S, Derrac J, Triguero I, Carmona C, Herrera F. Evolutionary-based selection of generalized instances for imbalanced classification. J Knowl Syst. 2012;25:3\u201312. https:\/\/doi.org\/10.1016\/j.knosys.2011.01.012 .","journal-title":"J Knowl Syst"},{"key":"108_CR24","doi-asserted-by":"publisher","DOI":"10.1155\/2013\/694809","author":"F Hu","year":"2013","unstructured":"Hu F, Li H. A novel boundary oversampling algorithm based on neighborhood rough set model: NRSBoundary-SMOTE. Math Probl Eng. 2013. https:\/\/doi.org\/10.1155\/2013\/694809 .","journal-title":"Math Probl Eng."},{"key":"108_CR25","first-page":"107","volume-title":"SMOTEBoost: improving prediction of the minority class in boosting","author":"N Chawla","year":"2003","unstructured":"Chawla N, Lazarevic A, Hall L, Bowyer K. SMOTEBoost: improving prediction of the minority class in boosting. Berlin: PKDD Springer; 2003. p. 107\u201319."},{"key":"108_CR26","first-page":"1783","volume":"8","author":"H Xiong","year":"2012","unstructured":"Xiong H, Yang Y, Zhao S. Local clustering ensemble learning method based on improved AdaBoost for rare class analysis. J Comput Inform Syst. 2012;8:1783\u201390.","journal-title":"J Comput Inform Syst"},{"key":"108_CR27","doi-asserted-by":"crossref","unstructured":"Alberto F, Jesus M, Herrera F. Multi-class imbalanced data-sets with linguistic fuzzy rule based classification systems based on pairwise learning. In: International conference on information processing and management of uncertainty in knowledge-based systems. Berlin: Springer; 2010. p. 89\u201398.","DOI":"10.1007\/978-3-642-14049-5_10"},{"key":"108_CR28","doi-asserted-by":"publisher","unstructured":"Han J, Liu Y, Sun X. A scalable random forest algorithm based on MapReduce. In: 4th IEEE international conference software engineering and service science (ICSESS); 2013. p. 849\u201352. https:\/\/doi.org\/10.1109\/icsess.2013.6615438 .","DOI":"10.1109\/icsess.2013.6615438"},{"key":"108_CR29","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/j.knosys.2016.05.048","volume":"106","author":"Z Zhang","year":"2016","unstructured":"Zhang Z, et al. Empowering one-vs-one decomposition with ensemble learning for multi-class imbalanced data. Elsevier Knowl Syst. 2016;106:251\u201363.","journal-title":"Elsevier Knowl Syst"},{"key":"108_CR30","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TSM.2015.2445380","volume":"28","author":"J Kwak","year":"2015","unstructured":"Kwak J, Lee T, Kim C. An incremental clustering-based fault detection algorithm for class-imbalanced\u00a0process\u00a0data. IEEE Trans Semicond Manuf. 2015;28:318\u201328. https:\/\/doi.org\/10.1109\/TSM.2015.2445380 .","journal-title":"IEEE Trans Semicond Manuf"},{"key":"108_CR31","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/MIS.2016.27","volume":"31","author":"S Kim","year":"2016","unstructured":"Kim S, Kim H, Namkoong Y. Ordinal classification of\u00a0imbalanced\u00a0data\u00a0with application in emergency and disaster information services. IEEE Intell Syst. 2016;31:50\u20136. https:\/\/doi.org\/10.1109\/MIS.2016.27 .","journal-title":"IEEE Intell Syst"},{"key":"108_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-016-0040-9","volume":"3","author":"M Chandak","year":"2016","unstructured":"Chandak M. Role of big-data in classification and novel class detection in data streams. J Big Data. 2016;3:1\u20139. https:\/\/doi.org\/10.1186\/s40537-016-0040-9 .","journal-title":"J Big Data."},{"key":"108_CR33","doi-asserted-by":"publisher","first-page":"966","DOI":"10.1016\/j.cor.2005.05.019","volume":"34","author":"D Li","year":"2007","unstructured":"Li D, Wu C, Tsai I, Lina Y. Using mega-trend-diffusion and artificial samples in small data set learning for early flexible manufacturing system scheduling knowledge. J Comput Oper Res. 2007;34:966\u201382. https:\/\/doi.org\/10.1016\/j.cor.2005.05.019 .","journal-title":"J Comput Oper Res"},{"key":"108_CR34","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1109\/tkde.2012.232","volume":"26","author":"S Barua","year":"2014","unstructured":"Barua S, Islam M, Yao X, Murase K. MWMOTE\u2014majority weighted minority oversampling technique for imbalanced data set learning. IEEE Trans Knowl Data Eng. 2014;26:405\u201325. https:\/\/doi.org\/10.1109\/tkde.2012.232 .","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"108_CR35","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1007\/978-3-319-18038-0_20","volume-title":"Advances in knowledge discovery and data mining, PAKDD 2015","author":"X Ai","year":"2015","unstructured":"Ai X, Wu J, Sheng V, Zhao P, Yao Y, Cui Z. Immune centroids over-sampling method for multi-class classification. In: Cao T, Lim EP, Zhou ZH, Ho TB, Cheung D, Motoda H, editors. Advances in knowledge discovery and data mining, PAKDD 2015, vol. 9077. Berlin: Springer; 2015. p. 251\u201363. https:\/\/doi.org\/10.1007\/978-3-319-18038-0_20 ."},{"key":"108_CR36","doi-asserted-by":"publisher","unstructured":"Patil S, Sonavane S. Enhanced over_sampling techniques for handling imbalanced big data set classification. In: Data science and big data: an environment of computational intelligence (studies in big data). Berlin: Springer International Publishing; 2017. 24: 49\u201381. https:\/\/doi.org\/10.1007\/978-3-319-53474-9_3 .","DOI":"10.1007\/978-3-319-53474-9_3"},{"key":"108_CR37","doi-asserted-by":"publisher","unstructured":"Yoon K, Kwek S. An unsupervised learning approach to resolving the data imbalanced issue in supervised learning problems in functional genomics. In: IEEEHybrid Intelligent Systems, 2005. HIS\u201905. Fifth International Conference. 2005. p. 1\u20136. https:\/\/doi.org\/10.1109\/ichis.2005.23 .","DOI":"10.1109\/ichis.2005.23"},{"key":"108_CR38","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/j.ins.2016.09.038","volume":"384","author":"M Bach","year":"2017","unstructured":"Bach M, Werner A, \u017bywiec J, Pluskiewicz W. The study of under- and over-sampling methods\u2019 utility in analysis of highly\u00a0imbalanced\u00a0data\u00a0on osteoporosis. Inform Sci. 2017;384:174\u201390.","journal-title":"Inform Sci"},{"key":"108_CR39","doi-asserted-by":"publisher","unstructured":"Rivera W, Asparouhov O. Safe level OUPS for improving target concept learning in imbalanced data sets. In: Proceeding of the IEEE SoutheastCon. 2015. p. 1\u20138. https:\/\/doi.org\/10.1109\/secon.2015.7132940 .","DOI":"10.1109\/secon.2015.7132940"},{"key":"108_CR40","doi-asserted-by":"publisher","first-page":"731","DOI":"10.1007\/978-3-540-37256-1_89","volume-title":"Intelligent control and automation. Lecture Notes in Control and Information Sciences","author":"S Yen","year":"2006","unstructured":"Yen S, Lee Y. Under-sampling approaches for improving prediction of the minority class in an imbalanced dataset. In: Huang DS, Li K, Irwin GW, editors. Intelligent control and automation. Lecture Notes in Control and Information Sciences, vol. 344. Berlin: Springer; 2006. p. 731\u201340. https:\/\/doi.org\/10.1007\/978-3-540-37256-1_89 ."},{"key":"108_CR41","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1007\/s10489-011-0287-y","volume":"36","author":"C Bunkhumpornpat","year":"2012","unstructured":"Bunkhumpornpat C, Sinapiromsaran K, Lursinsap C. DBSMOTE: density-based synthetic minority over-sampling technique. J Appl Intell. 2012;36:664\u201384. https:\/\/doi.org\/10.1007\/s10489-011-0287-y .","journal-title":"J Appl Intell"},{"key":"108_CR42","unstructured":"Machine Learning Repository, Center for Machine Learning and Intelligent Systems, US (NFS). https:\/\/archive.ics.uci.edu\/ml\/datasets.html . Accessed 28 Oct 2017."},{"key":"108_CR43","unstructured":"SImple Drug Operation mechanisms. Cauality Workbench, US (NSF under\u00a0Grant N0 ECCS-0725746). 2008. http:\/\/www.causality.inf.ethz.ch\/data\/SIDO.html . Accessed 28 Oct 2017."},{"key":"108_CR44","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1109\/TETC.2014.2310485","volume":"2","author":"H Rong","year":"2014","unstructured":"Rong H, Wanchun D, Jianxun L. ClubCF: a clustering-based collaborative filtering approach for big data application. IEEE Trans Emerg Topics Comput. 2014;2:302\u201313.","journal-title":"IEEE Trans Emerg Topics Comput"},{"key":"108_CR45","doi-asserted-by":"crossref","first-page":"1","DOI":"10.2991\/itmr.2013.3.1.1","volume":"3","author":"M Weiss","year":"2013","unstructured":"Weiss M, Sari S, Noori N. Niche formation in the Mashup ecosystem. Tech Innov Manag Rev. 2013;3:1\u20136.","journal-title":"Tech Innov Manag Rev."}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-017-0108-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s40537-017-0108-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-017-0108-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,8]],"date-time":"2019-10-08T19:43:11Z","timestamp":1570563791000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-017-0108-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["108"],"URL":"https:\/\/doi.org\/10.1186\/s40537-017-0108-1","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,12]]},"article-number":"49"}}