{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T04:47:36Z","timestamp":1762145256871,"version":"3.37.3"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,5,15]],"date-time":"2017-05-15T00:00:00Z","timestamp":1494806400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003329","name":"Ministerio de Econom\u00eda y Competitividad","doi-asserted-by":"publisher","award":["BES-2012-060450","TIN2014-57251-P"],"award-info":[{"award-number":["BES-2012-060450","TIN2014-57251-P"]}],"id":[{"id":"10.13039\/501100003329","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003329","name":"Ministerio de Econom\u00eda y Competitividad","doi-asserted-by":"publisher","award":["TIN2016-81113-R","TIN2013-47210-P"],"award-info":[{"award-number":["TIN2016-81113-R","TIN2013-47210-P"]}],"id":[{"id":"10.13039\/501100003329","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002878","name":"Consejer\u00eda de Econom\u00eda, Innovaci\u00f3n, Ciencia y Empleo, Junta de Andaluc\u00eda","doi-asserted-by":"publisher","award":["P12-TIC-2958"],"award-info":[{"award-number":["P12-TIC-2958"]}],"id":[{"id":"10.13039\/501100002878","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Prog Artif Intell"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s13748-017-0128-2","type":"journal-article","created":{"date-parts":[[2017,5,15]],"date-time":"2017-05-15T16:43:18Z","timestamp":1494866598000},"page":"347-354","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["SMOTE-GPU: Big Data preprocessing on commodity hardware for imbalanced classification"],"prefix":"10.1007","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0233-1554","authenticated-orcid":false,"given":"Pablo D.","family":"Guti\u00e9rrez","sequence":"first","affiliation":[]},{"given":"Miguel","family":"Lastra","sequence":"additional","affiliation":[]},{"given":"Jos\u00e9 M.","family":"Ben\u00edtez","sequence":"additional","affiliation":[]},{"given":"Francisco","family":"Herrera","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,5,15]]},"reference":[{"issue":"3","key":"128_CR1","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1007\/s00500-008-0323-y","volume":"13","author":"J Alcal\u00e1-Fdez","year":"2009","unstructured":"Alcal\u00e1-Fdez, J., S\u00e1nchez, L., Garc\u00eda, S., del Jesus, M., Ventura, S., Garrell, J., Otero, J., Romero, C., Bacardit, J., Rivas, V., Fern\u00e1ndez, J., Herrera, F.: KEEL: a software tool to assess evolutionary algorithms for data mining problems. Soft Comput. 13(3), 307\u2013318 (2009)","journal-title":"Soft Comput."},{"unstructured":"Bache, K., Lichman, M.: UCI machine learning repository (2013). \n                        http:\/\/archive.ics.uci.edu\/ml","key":"128_CR2"},{"doi-asserted-by":"crossref","unstructured":"Baldi, P., Sadowski, P., Whiteson, D.: Searching for exotic particles in high-energy physics with deep learning. Nat. Commun. 5 (2014)","key":"128_CR3","DOI":"10.1038\/ncomms5308"},{"issue":"7","key":"128_CR4","doi-asserted-by":"crossref","first-page":"1145","DOI":"10.1016\/S0031-3203(96)00142-2","volume":"30","author":"AP Bradley","year":"1997","unstructured":"Bradley, A.P.: The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern Recognit. 30(7), 1145\u20131159 (1997)","journal-title":"Pattern Recognit."},{"issue":"1","key":"128_CR5","first-page":"321","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. J. Artif. Int. Res. 16(1), 321\u2013357 (2002)","journal-title":"J. Artif. Int. Res."},{"unstructured":"CUDA. \n                        http:\/\/www.nvidia.com\/object\/cuda_home_new.html\n                        \n                    . Accessed March 2017","key":"128_CR6"},{"issue":"1","key":"128_CR7","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"unstructured":"ECBDL14 dataset: Protein structure prediction and contact map for the ECBDL2014 big data competition (2014). \n                        http:\/\/cruncher.ncl.ac.uk\/bdcomp\/","key":"128_CR8"},{"doi-asserted-by":"publisher","unstructured":"Fern\u00e1ndez, A., del R\u00edo, S., Chawla, N.V., Herrera, F.: An insight into imbalanced big data classification: outcomes and challenges. Complex Intell. Syst. (in press). doi:\n                        10.1007\/s40747-017-0037-9","key":"128_CR9","DOI":"10.1007\/s40747-017-0037-9"},{"unstructured":"Foundation, A.S.: Apache Mahout (2017). \n                        http:\/\/mahout.apache.org\/\n                        \n                    . Accessed March 2017","key":"128_CR10"},{"doi-asserted-by":"crossref","unstructured":"Guti\u00e9rrez, P.D., Lastra, M., Bacardit, J., Ben\u00edtez, J.M., Herrera, F.: GPU\u2013SME\u2013kNN: scalable and memory efficient \n                        $$k$$\n                        \n                            \n                                            \n                                k\n                            \n                        \n                    NN and lazy learning using GPUs. Inf. Sci. 373, 165\u2013182 (2016)","key":"128_CR11","DOI":"10.1016\/j.ins.2016.08.089"},{"issue":"1","key":"128_CR12","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1109\/TIFS.2013.2291220","volume":"9","author":"PD Guti\u00e9rrez","year":"2014","unstructured":"Guti\u00e9rrez, P.D., Lastra, M., Herrera, F., Benitez, J.M.: A high performance fingerprint matching system for large databases based on GPU. IEEE Trans. Inf. Forensics Secur. 9(1), 62\u201371 (2014)","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"issue":"9","key":"128_CR13","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., Garcia, E.A.: Learning from imbalanced data. IEEE Trans. Knowl. Data Eng. 21(9), 1263\u20131284 (2009)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"7","key":"128_CR14","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1145\/366622.366644","volume":"4","author":"CAR Hoare","year":"1961","unstructured":"Hoare, C.A.R.: Algorithm 64: quicksort. Commun. ACM 4(7), 321 (1961)","journal-title":"Commun. ACM"},{"issue":"4","key":"128_CR15","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1007\/s13748-016-0094-0","volume":"5","author":"B Krawczyk","year":"2016","unstructured":"Krawczyk, B.: Learning from imbalanced data: open challenges and future directions. Progr. Artif. Intell. 5(4), 221\u2013232 (2016)","journal-title":"Progr. Artif. Intell."},{"key":"128_CR16","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1016\/j.ins.2013.07.007","volume":"250","author":"V L\u00f3pez","year":"2013","unstructured":"L\u00f3pez, V., Fern\u00e1ndez, A., Garc\u00eda, S., Palade, V., Herrera, F.: An insight into classification with imbalanced data: Empirical results and current trends on using data intrinsic characteristics. Inf. Sci. 250, 113\u2013141 (2013)","journal-title":"Inf. Sci."},{"issue":"3","key":"128_CR17","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1109\/MIC.2012.50","volume":"16","author":"S Madden","year":"2012","unstructured":"Madden, S.: From databases to big data. IEEE Internet Comput. 16(3), 4\u20136 (2012)","journal-title":"IEEE Internet Comput."},{"issue":"34","key":"128_CR18","first-page":"1","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng, X., Bradley, J., Yavuz, B., Sparks, E., Venkataraman, S., Liu, D., Freeman, J., Tsai, D., Amde, M., Owen, S., et al.: MLLIB: machine learning in apache spark. J. Mach. Learn. Res. 17(34), 1\u20137 (2016)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Owen, S., Anil, R., Dunning, T., Friedman, E.: Mahout in Action, Manning Publications Co., Greenwich, CT, USA, ISBN:1935182684, 9781935182689 (2011)","key":"128_CR19"},{"issue":"1","key":"128_CR20","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1007\/s10115-014-0794-3","volume":"45","author":"RC Prati","year":"2015","unstructured":"Prati, R.C., Batista, G.E.A.P.A., Silva, D.F.: Class imbalance revisited: a new experimental setup to assess the performance of treatment methods. Knowl. Inf. Syst. 45(1), 247\u2013270 (2015)","journal-title":"Knowl. Inf. Syst."},{"key":"128_CR21","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9781139058452","volume-title":"Mining of Massive Datasets","author":"A Rajaraman","year":"2011","unstructured":"Rajaraman, A., Ullman, J.: Mining of Massive Datasets. Cambridge University Press, Cambridge (2011)"},{"issue":"9","key":"128_CR22","doi-asserted-by":"crossref","first-page":"3878","DOI":"10.1021\/ct400314y","volume":"9","author":"R Salomon-Ferrer","year":"2013","unstructured":"Salomon-Ferrer, R., G\u00f6tz, A., Poole, D., Le Grand, S., Walker, R.: Routine microsecond molecular dynamics simulations with amber on GPUS. 2. Explicit solvent particle mesh ewald. J. Chem. Theory Comput. 9(9), 3878\u20133888 (2013)","journal-title":"J. Chem. Theory Comput."},{"unstructured":"Spark, A.: Machine Learning Library (MLlib) for Spark (2017). \n                        http:\/\/spark.apache.org\/docs\/latest\/mllib-guide.html\n                        \n                    . Accessed March 2017","key":"128_CR23"},{"key":"128_CR24","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1016\/j.knosys.2015.05.027","volume":"87","author":"I Triguero","year":"2015","unstructured":"Triguero, I., del R\u00edo, S., L\u00f3pez, V., Bacardit, J., Ben\u00edtez, J.M., Herrera, F.: ROSEFW-RF: the winner algorithm for the ECBDL\u201914 big data competition\u2014an extremely imbalanced big data bioinformatics problem. Knowl. Based Syst. 87, 69\u201379 (2015)","journal-title":"Knowl. Based Syst."},{"key":"128_CR25","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2015","unstructured":"White, T.: Hadoop: The Definitive Guide, 4th edn. O\u2019Reilly Media Inc, Sebastopol (2015)","edition":"4"},{"unstructured":"Zaharia, M., Chowdhury, M., Das, T., Dave, A., Ma, J., McCauley, M., Franklin, M.J., Shenker, S., Stoica, I.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation, pp. 1\u201314. USENIX Association (2012)","key":"128_CR26"},{"key":"128_CR27","volume-title":"Understanding Big Data: Analytics for Enterprise Class Hadoop and Streaming Data","author":"PC Zikopoulos","year":"2011","unstructured":"Zikopoulos, P.C., Eaton, C., deRoos, D., Deutsch, T., Lapis, G.: Understanding Big Data: Analytics for Enterprise Class Hadoop and Streaming Data, 1st edn. McGraw-Hill, New York (2011)","edition":"1"}],"container-title":["Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13748-017-0128-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-017-0128-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-017-0128-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,10,20]],"date-time":"2017-10-20T03:41:42Z","timestamp":1508470902000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13748-017-0128-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5,15]]},"references-count":27,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["128"],"URL":"https:\/\/doi.org\/10.1007\/s13748-017-0128-2","relation":{},"ISSN":["2192-6352","2192-6360"],"issn-type":[{"type":"print","value":"2192-6352"},{"type":"electronic","value":"2192-6360"}],"subject":[],"published":{"date-parts":[[2017,5,15]]}}}