{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T14:02:10Z","timestamp":1770818530713,"version":"3.50.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319313092","type":"print"},{"value":"9783319313115","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-31311-5_7","type":"book-chapter","created":{"date-parts":[[2016,4,1]],"date-time":"2016-04-01T04:37:03Z","timestamp":1459485423000},"page":"157-171","source":"Crossref","is-referenced-by-count":5,"title":["Is Data Sampling Required When Using Random Forest for Classification on Imbalanced Bioinformatics Data?"],"prefix":"10.1007","author":[{"given":"David J.","family":"Dittman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Taghi M.","family":"Khoshgoftaar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amri","family":"Napolitano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,4,2]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Abu Shanab, A., Khoshgoftaar, T.M., Wald, R., Napolitano, A.: Impact of noise and data sampling on stability of feature ranking techniques for biological datasets. In: 2012 IEEE International Conference on Information Reuse and Integration (IRI), pp. 415\u2013422, Aug 2012","DOI":"10.1109\/IRI.2012.6303039"},{"issue":"3","key":"7_CR2","doi-asserted-by":"crossref","first-page":"195","DOI":"10.2165\/00822942-200504030-00004","volume":"4","author":"Ali Al-Shahib","year":"2005","unstructured":"Al-Shahib, A., Breitling, R., Gilbert, D.: Feature selection and the class imbalance problem in predicting protein function from sequence. Appl. Bioinform. 4(3), 195\u2013203 (2005). \n                  http:\/\/www.ingentaconnect.com\/content\/adis\/abi\/2005\/00000004\/00000003\/art00004","journal-title":"Applied Bioinformatics"},{"key":"7_CR3","unstructured":"Berenson, M.L., Goldstein, M., Levine, D.: Intermediate Statistical Methods and Applications: A Computer Package Approach, 2nd edn. Prentice Hall (1983)"},{"key":"7_CR4","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45, 5\u201332 (2001)","journal-title":"Mach. Learn."},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Wasikowski, M.: Fast: a ROC-based feature selection metric for small samples and imbalanced data classification problems. In: Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD\u201908), pp. 124\u2013132. ACM, New York, NY (2008)","DOI":"10.1145\/1401890.1401910"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Diaz-Uriarte, R., Alvarez de Andres, S.: Gene selection and classification of microarray data using random forest. BMC Bioinform. 7, 1\u201313 (2006)","DOI":"10.1186\/1471-2105-7-3"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Napolitano, A.: Selecting the appropriate data sampling approach for imbalanced and high-dimensional bioinformatics datasets. In: 2014 14th IEEE International Conference on Bioinformatics and Bioengineering (BIBE), pp. 304\u2013310 (2014)","DOI":"10.1109\/BIBE.2014.61"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Wald, R., Napolitano, A.: Random forest: a reliable tool for patient response prediction. In: Proceedings of the IEEE International Conference on Bioinformatics and Biomedicine (BIBM) Workshops, pp. 289\u2013296. BIBM (2011)","DOI":"10.1109\/BIBMW.2011.6112389"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Wald, R., Van Hulse, J.: Comparative analysis of dna microarray data through the use of feature selection techniques. In: Proceedings of the Ninth IEEE International Conference on Machine Learning and Applications (ICMLA), pp. 147\u2013152. ICMLA (2010)","DOI":"10.1109\/ICMLA.2010.29"},{"key":"7_CR10","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Napolitano, A.: Selecting the appropriate ensemble learning approach for balanced bioinformatics data. In: Florida Artificial Intelligence Research Society Conference, pp. 329\u2013334 (2015)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Wald, R., Napolitano, A.: Simplifying the utilization of machine learning techniques for bioinformatics. In: 2013 12th International Conference on Machine Learning and Applications (ICMLA), pp. 396\u2013403 (2013)","DOI":"10.1109\/ICMLA.2013.155"},{"key":"7_CR12","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Wald, R., Napolitano, A.: Comparison of data sampling approaches for imbalanced bioinformatics data. In: 27th International Conference on Florida Artificial Intelligence Society (FLAIRS), pp. 268\u2013271 (2014)"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Dittman, D.J., Khoshgoftaar, T.M., Napolitano, A.: The effect of data sampling when using random forest on imbalanced bioinformatics data. In: 2015 IEEE International Conference on Information Reuse and Integration (IRI), pp. 457\u2013463, Aug 2015","DOI":"10.1109\/IRI.2015.76"},{"issue":"8","key":"7_CR14","doi-asserted-by":"crossref","first-page":"861","DOI":"10.1016\/j.patrec.2005.10.010","volume":"27","author":"Tom Fawcett","year":"2006","unstructured":"Fawcett, T.: An introduction to ROC analysis. Pattern Recogn. Lett. 27(8), 861\u2013874 (2006). \n                  http:\/\/www.sciencedirect.com\/science\/article\/pii\/S016786550500303X","journal-title":"Pattern Recognition Letters"},{"issue":"6","key":"7_CR15","doi-asserted-by":"crossref","first-page":"1437","DOI":"10.1109\/TKDE.2003.1245283","volume":"15","author":"M.A. Hall","year":"2003","unstructured":"Hall, M.A., Holmes, G.: Benchmarking attribute selection techniques for discrete class data mining. IEEE Trans. Knowl. Data Eng. 15(6), 392\u2013398 (2003)","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"18","key":"7_CR16","doi-asserted-by":"crossref","first-page":"1873","DOI":"10.1001\/jama.2011.593","volume":"305","author":"Christos Hatzis","year":"2011","unstructured":"Hatzis, C., Pusztai, L., Valero, V., et al.: A genomic predictor of response and survival following taxane-anthracycline chemotherapy for invasive breast cancer. JAMA 305(18), 1873\u20131881 (2011). \n                  http:\/\/dx.doi.org\/10.1001\/jama.2011.593","journal-title":"JAMA"},{"issue":"9","key":"7_CR17","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., Garcia, E.A.: Learning from imbalanced data. IEEE Trans. Knowl. Data Eng. 21(9), 1263\u20131284 (2009)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Khoshgoftaar, T.M., Dittman, D.J., Wald, R., Fazelpour, A.: First order statistics based feature selection: a diverse and powerful family of feature selection techniques. In: Proceedings of the Eleventh International Conference on Machine Learning and Applications (ICMLA): Health Informatics Workshop, pp. 151\u2013157. ICMLA (2012)","DOI":"10.1109\/ICMLA.2012.192"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Khoshgoftaar, T.M., Wald, R., Dittman, D.J., Napolitano, A.: Classification performance of three approaches for combining data sampling and gene selection on bioinformatics data. In: 2014 14th IEEE International Conference on Information Reuse and Integration (IRI), pp. 315\u2013321 (2014)","DOI":"10.1109\/IRI.2014.7051906"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Khoshgoftaar, T.M., Dittman, D.J., Wald, R., Awada, W.: A review of ensemble classification for dna microarrays data. In: 2013 IEEE 25th International Conference on Tools with Artificial Intelligence (ICTAI), pp. 381\u2013389. IEEE (2013)","DOI":"10.1109\/ICTAI.2013.64"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Khoshgoftaar, T.M., Golawala, M., Van Hulse, J.: An empirical study of learning from imbalanced data using random forest. In: IEEE International Conference on Tools with Artificial Intelligence, pp. 310\u2013317 (2007)","DOI":"10.1109\/ICTAI.2007.46"},{"key":"7_CR22","first-page":"1137","volume":"14","author":"R Kohavi","year":"1995","unstructured":"Kohavi, R.: A study of cross-validation and bootstrap for accuracy estimation and model selection. IJCAI 14, 1137\u20131145 (1995)","journal-title":"IJCAI"},{"issue":"38","key":"7_CR23","doi-asserted-by":"crossref","first-page":"13550","DOI":"10.1073\/pnas.0506230102","volume":"102","author":"L. D. Miller","year":"2005","unstructured":"Miller, L.D., Smeds, J., George, J., Vega, V.B., Vergara, L., Ploner, A., Pawitan, Y., Hall, P., Klaar, S., Liu, E.T., Bergh, J.: An expression signature for p53 status in human breast cancer predicts mutation status, transcriptional effects, and patient survival. In: Proceedings of the National Academy of Sciences of the United States of America 102(38), 13550\u201313555 (2005). \n                  http:\/\/www.pnas.org\/content\/102\/38\/13550.abstract","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"7_CR24","unstructured":"Pawitan, Y., Bjohle, J., Amler, L., Borg, A.L., Egyhazi, S., Hall, P., Han, X., Holmberg, L., Huang, F., Klaar, S., Liu, E., Miller, L., Nordgren, H., Ploner, A., Sandelin, K., Shaw, P., Smeds, J., Skoog, L., Wedren, S., Bergh, J.: Gene expression profiling spares early breast cancer patients from adjuvant therapy: derived and validated in two population-based cohorts. Breast Cancer Res. 7(6), R953\u2013R964 (2005). \n                  http:\/\/breast-cancer-research.com\/content\/7\/6\/R953"},{"issue":"7","key":"7_CR25","doi-asserted-by":"crossref","first-page":"2254","DOI":"10.1158\/1078-0432.CCR-06-2609","volume":"13","author":"M. Raponi","year":"2007","unstructured":"Raponi, M., Harousseau, J.L., Lancet, J.E., Lwenberg, B., Stone, R., Zhang, Y., Rackoff, W., Wang, Y., Atkins, D.: Identification of molecular predictors of response in a study of tipifarnib treatment in relapsed and refractory acute myelogenous leukemia. Clin. Cancer Res. 13(7), 2254\u20132260 (2007). \n                  http:\/\/clincancerres.aacrjournals.org\/content\/13\/7\/2254.abstract","journal-title":"Clinical Cancer Research"},{"issue":"21","key":"7_CR26","doi-asserted-by":"crossref","first-page":"5351","DOI":"10.1158\/1078-0432.CCR-10-1265","volume":"16","author":"Adel Tabchy","year":"2010","unstructured":"Tabchy, A., Valero, V., Vidaurre, T., Lluch, A., Gomez, H., Martin, M., Qi, Y., Barajas-Figueroa, L.J., Souchon, E., Coutant, C., Doimi, F.D., Ibrahim, N.K., Gong, Y., Hortobagyi, G.N., Hess, K.R., Symmans, W.F., Pusztai, L.: Evaluation of a 30-gene paclitaxel, fluorouracil, doxorubicin, and cyclophosphamide chemotherapy response predictor in a multicenter randomized trial in breast cancer. Clin. Cancer Res. 16(21), 5351\u20135361 (2010). \n                  http:\/\/clincancerres.aacrjournals.org\/content\/16\/21\/5351.abstract","journal-title":"Clinical Cancer Research"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Van Hulse, J., Khoshgoftaar, T.M., Napolitano, A., Wald, R.: Feature selection with high-dimensional imbalanced data. In: 2009 IEEE International Conference on Data Mining Workshops, ICDMW\u201909, pp. 507\u2013514, Dec 2009","DOI":"10.1109\/ICDMW.2009.35"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Van Hulse, J., Khoshgoftaar, T.M., Napolitano, A., Wald, R.: A comparative evaluation of feature ranking methods for high dimensional bioinformatics data. In: Proceedings of the IEEE International Conference on Information Reuse and Integration\u2014IRI\u201911, pp. 315\u2013320 (2011)","DOI":"10.1109\/IRI.2011.6009566"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Wald, R., Khoshgoftaar, T.M., Dittman, D.J., Napolitano, A.: Random forest with 200 selected features: an optimal model for bioinformatics research. In: 2013 12th International Conference on Machine Learning and Applications (ICMLA), vol. 1, pp. 154\u2013160, Dec 2013","DOI":"10.1109\/ICMLA.2013.34"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Wang, H., Khoshgoftaar, T.M., Van Hulse, J.: A comparative study of threshold-based feature selection techniques. In: 2010 IEEE International Conference on Granular Computing (GrC), pp. 499\u2013504 (2010)","DOI":"10.1109\/GrC.2010.104"},{"issue":"10","key":"7_CR31","doi-asserted-by":"crossref","first-page":"1388","DOI":"10.1109\/TKDE.2009.187","volume":"22","author":"Mike Wasikowski","year":"2010","unstructured":"Wasikowski, M., wen Chen, X.: Combating the small sample class imbalance problem using feature selection. IEEE Trans. Knowl. Data Eng. 22, 1388\u20131400 (2010)","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"7","key":"7_CR32","doi-asserted-by":"crossref","first-page":"3370","DOI":"10.1158\/0008-5472.CAN-05-3834","volume":"66","author":"Toshiaki Watanabe","year":"2006","unstructured":"Watanabe, T., Komuro, Y., Kiyomatsu, T., Kanazawa, T., Kazama, Y., Tanaka, J., Tanaka, T., Yamamoto, Y., Shirane, M., Muto, T., Nagawa, H.: Prediction of sensitivity of rectal cancer cells in response to preoperative radiotherapy by DNA microarray analysis of gene expression profiles. Cancer Res. 66(7), 3370\u20133374 (2006). \n                  http:\/\/cancerres.aacrjournals.org\/content\/66\/7\/3370.abstract","journal-title":"Cancer Research"},{"key":"7_CR33","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1613\/jair.1199","volume":"19","author":"GM Weiss","year":"2003","unstructured":"Weiss, G.M., Provost, F.J.: Learning when training data are costly: the effect of class distribution on tree induction. J. Artif. Intell. Res. (JAIR) 19, 315\u2013354 (2003)","journal-title":"J. Artif. Intell. Res. (JAIR)"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Witten, I.H., Frank, E.: Data Mining: Practical Machine Learning Tools and Techniques, 3rd edn. Morgan Kaufmann (2011)","DOI":"10.1016\/B978-0-12-374856-0.00001-8"}],"container-title":["Advances in Intelligent Systems and Computing","Theoretical Information Reuse and Integration"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-31311-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T19:08:45Z","timestamp":1559416125000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-31311-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319313092","9783319313115"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-31311-5_7","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"value":"2194-5357","type":"print"},{"value":"2194-5365","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]}}}