{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T11:39:02Z","timestamp":1772192342232,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T00:00:00Z","timestamp":1698364800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T00:00:00Z","timestamp":1698364800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16788-7","type":"journal-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T16:02:36Z","timestamp":1698422556000},"page":"47627-47648","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Software fault prediction with imbalanced datasets using SMOTE-Tomek sampling technique and Genetic Algorithm models"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3090-1216","authenticated-orcid":false,"given":"Mansi","family":"Gupta","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9263-3072","authenticated-orcid":false,"given":"Kumar","family":"Rajnish","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0680-2691","authenticated-orcid":false,"given":"Vandana","family":"Bhattacharjee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"16788_CR1","doi-asserted-by":"crossref","unstructured":"K. E. Bennin, J. W. Keung, and A. Monden, \u201cOn the relative value of data resampling approaches for software defect prediction,\u201d Empirical Software Eng, vol. 24, no. 2, pp. 602\u2013636, Jun. 2018.","DOI":"10.1007\/s10664-018-9633-6"},{"key":"16788_CR2","doi-asserted-by":"crossref","unstructured":"Pelayo L and Dick S (2007) \u201cApplying Novel Resampling Strategies To Software Defect Prediction,\u201d NAFIPS 2007 - 2007 Annual Meeting of the North American Fuzzy Information Processing Society","DOI":"10.1109\/NAFIPS.2007.383813"},{"issue":"1","key":"16788_CR3","first-page":"39","volume":"52","author":"Y Mahmood","year":"2021","unstructured":"Mahmood Y, Kama N, Azmi A, Khan AS, Ali M (2021) Software effort estimation accuracy prediction of machine learning techniques: A systematic performance evaluation. Softw: Pract Exp 52(1):39\u201365","journal-title":"Softw: Pract Exp"},{"key":"16788_CR4","doi-asserted-by":"crossref","unstructured":"A. Abid, M. T. Khan, and J. Iqbal, \u201cA review on fault detection and diagnosis techniques: basics and beyond,\u201d Artificial Intel Rev, vol. 54, no. 5, pp. 3639\u20133664, Nov. 2020.","DOI":"10.1007\/s10462-020-09934-2"},{"key":"16788_CR5","doi-asserted-by":"crossref","unstructured":"Gupta M, Rajnish K, Bhattarcharjee V (2020) \u201cPredicting Software Cost Through Entity\u2013Relationship Diagrams: An Empirical View,\u201d Lecture Notes in Electrical Engineering, pp. 561\u2013567","DOI":"10.1007\/978-981-15-7486-3_51"},{"issue":"5","key":"16788_CR6","doi-asserted-by":"publisher","first-page":"1164","DOI":"10.11591\/ijece.v5i5.pp1164-1173","volume":"5","author":"TMH Le","year":"2015","unstructured":"Le TMH, Nguyen TB, Khuat TT (2015) Survey on Mutation-based Test Data Generation. Int J Electric Comput Eng (IJECE) 5(5):1164\u20131173","journal-title":"Int J Electric Comput Eng (IJECE)"},{"key":"16788_CR7","first-page":"1","volume":"2016","author":"D Tomar","year":"2016","unstructured":"Tomar D, Agarwal S (2016) Prediction of Defective Software Modules Using Class Imbalance Learning. Appl Comput Intell Soft Comput 2016:1\u201312","journal-title":"Appl Comput Intell Soft Comput"},{"issue":"4","key":"16788_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3343440","volume":"52","author":"H Kaur","year":"2020","unstructured":"Kaur H, Pannu HS, Malhi AK (2020) A Systematic Review on Imbalanced Data Challenges in Machine Learning. ACM Comput Surv 52(4):1\u201336","journal-title":"ACM Comput Surv"},{"issue":"4","key":"16788_CR9","doi-asserted-by":"publisher","first-page":"3241","DOI":"10.11591\/ijece.v9i4.pp3241-3246","volume":"9","author":"TT Khuat","year":"2019","unstructured":"Khuat TT, Le MH (2019) Ensemble learning for software fault prediction problem with imbalanced data. Int J Electric Comput Eng (IJECE) 9(4):3241","journal-title":"Int J Electric Comput Eng (IJECE)"},{"key":"16788_CR10","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1016\/j.infsof.2014.07.005","volume":"58","author":"IH Laradji","year":"2015","unstructured":"Laradji IH, Alshayeb M, Ghouti L (2015) Software defect prediction using ensemble learning on selected features. Inform Software Technol 58:388\u2013402","journal-title":"Inform Software Technol"},{"key":"16788_CR11","doi-asserted-by":"crossref","unstructured":"A. O. Balogun, S. Basri, S. Mahamad, S. J. Abdulkadir, L. F. Capretz, A. A. Imam, M. A. Almomani, V. E. Adeyemo, and G. Kumar, \u201cEmpirical Analysis of Rank Aggregation-Based Multi-Filter Feature Selection Methods in Software Defect Prediction,\u201d Electronics, vol. 10, no. 2, p. 179, Jan. 2021.","DOI":"10.3390\/electronics10020179"},{"key":"16788_CR12","doi-asserted-by":"crossref","unstructured":"M. Gupta, K. Rajnish, and V. Bhattacharjee, \u201cImpact of Parameter Tuning for Optimizing Deep Neural Network Models for Predicting Software Faults,\u201d Scientific Programm, vol. 2021, pp. 1\u201317, Jun. 2021.","DOI":"10.1155\/2021\/6662932"},{"issue":"5","key":"16788_CR13","first-page":"1121","volume":"51","author":"H Alsghaier","year":"2020","unstructured":"Alsghaier H, Akour M (2020) Software fault prediction using Whale algorithm with genetics algorithm. Software: Pract Exp 51(5):1121\u20131146","journal-title":"Software: Pract Exp"},{"issue":"6","key":"16788_CR14","doi-asserted-by":"publisher","first-page":"1923","DOI":"10.1007\/s00521-020-05035-x","volume":"33","author":"KM Hamdia","year":"2020","unstructured":"Hamdia KM, Zhuang X, Rabczuk T (2020) An efficient optimization approach for designing machine learning models based on genetic algorithm. Neural Comput Appl 33(6):1923\u20131933","journal-title":"Neural Comput Appl"},{"key":"16788_CR15","doi-asserted-by":"crossref","unstructured":"Sohail A (2021) Genetic Algorithms in the Fields of Artificial Intelligence and Data Sciences. Ann Data Sci","DOI":"10.1007\/s40745-021-00354-9"},{"key":"16788_CR16","doi-asserted-by":"crossref","unstructured":"Bal PR, Kumar S (2018) Cross project software defect prediction using extreme learning machine: an ensemble based study. In: ICSOFT, pp 354\u2013361","DOI":"10.5220\/0006886503200327"},{"key":"16788_CR17","doi-asserted-by":"crossref","unstructured":"Sohan MF, Kabir MA, Jabiullah MI, Rahman SSMM (2019) Revisiting the Class Imbalance Issue in Software Defect Prediction, 2019 International Conference on Electrical, Computer and Communication Engineering (ECCE)","DOI":"10.1109\/ECACE.2019.8679382"},{"key":"16788_CR18","doi-asserted-by":"crossref","unstructured":"R. Malhotra and S. Kamal, \u201cAn empirical study to investigate oversampling methods for improving software defect prediction using imbalanced data,\u201d Neurocomputing, vol. 343, pp. 120\u2013140, May 2019.","DOI":"10.1016\/j.neucom.2018.04.090"},{"key":"16788_CR19","doi-asserted-by":"crossref","unstructured":"Khuat TT, Le MH (2020) \u201cEvaluation of Sampling-Based Ensembles of Classifiers on Imbalanced Data for Software Defect Prediction Problems\u201d. SN Comput Sci 1(2)","DOI":"10.1007\/s42979-020-0119-4"},{"key":"16788_CR20","doi-asserted-by":"publisher","first-page":"86855","DOI":"10.1109\/ACCESS.2021.3072682","volume":"9","author":"J Zheng","year":"2021","unstructured":"Zheng J, Wang X, Wei D, Chen B, Shao Y (2021) A Novel Imbalanced Ensemble Learning in Software Defect Predication. IEEE Access 9:86855\u201386868. https:\/\/doi.org\/10.1109\/ACCESS.2021.3072682","journal-title":"IEEE Access"},{"key":"16788_CR21","doi-asserted-by":"crossref","unstructured":"Balogun AO, Lafenwa-Balogun FB, Mojeed HA, Adeyemo VE, Akande ON, Akintola AG, Bajeh AO, Usman-Hamza FE (2020) SMOTE-Based Homogeneous Ensemble Methods for Software Defect Prediction. Lecture Notes Comput Sci:615\u2013631","DOI":"10.1007\/978-3-030-58817-5_45"},{"key":"16788_CR22","doi-asserted-by":"crossref","unstructured":"Elahi E, Ayub A, Hussain I (2021) Two staged data preprocessing ensemble model for software fault prediction,\" 2021 International Bhurban Conference on Applied Sciences and Technologies (IBCAST)","DOI":"10.1109\/IBCAST51254.2021.9393182"},{"issue":"2","key":"16788_CR23","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1080\/17445760.2019.1650039","volume":"36","author":"L Goel","year":"2019","unstructured":"Goel L, Sharma M, Khatri SK, Damodaran D (2019) Cross-project defect prediction using data sampling for class imbalance learning: an empirical study. Int J Parallel, Emergent Distrib Syst 36(2):130\u2013143","journal-title":"Int J Parallel, Emergent Distrib Syst"},{"issue":"1","key":"16788_CR24","doi-asserted-by":"publisher","first-page":"123","DOI":"10.23940\/ijpe.21.01.p12.123134","volume":"17","author":"X Guoqiang","year":"2021","unstructured":"Guoqiang X, Shiyi X, Xiaohong P, Zhao L (2021) Prediction of Number of Software Defects based on SMOTE. Int J Performabil Eng 17(1):123","journal-title":"Int J Performabil Eng"},{"key":"16788_CR25","doi-asserted-by":"crossref","unstructured":"Pandey SK, Tripathi AK (2021) Class Imbalance Issue in Software Defect Prediction Models by various Machine Learning Techniques: An Empirical Study, 2021 8th International Conference on Smart Computing and Communications (ICSCC)","DOI":"10.1109\/ICSCC51209.2021.9528170"},{"key":"16788_CR26","doi-asserted-by":"crossref","unstructured":"S. K. Pandey and A. K. Tripathi, \u201cAn empirical study toward dealing with noise and class imbalance issues in software defect prediction,\u201d Soft Comput, vol. 25, no. 21, pp. 13465\u201313492, Aug. 2021.","DOI":"10.1007\/s00500-021-06096-3"},{"key":"16788_CR27","doi-asserted-by":"publisher","unstructured":"M. Rostami, K. Berahmand, E. Nasiri, and S. Forouzandeh, \u201cReview of swarm intelligence-based feature selection methods,\u201d Eng Appl Artificial Intell, vol. 100, p. 104210, Apr. 2021, doi: https:\/\/doi.org\/10.1016\/j.engappai.2021.104210.","DOI":"10.1016\/j.engappai.2021.104210"},{"key":"16788_CR28","doi-asserted-by":"publisher","unstructured":"M. Rostami, S. Forouzandeh, K. Berahmand, M. Soltani, M. Shahsavari, and M. Oussalah, \u201cGene selection for microarray data classification via multi-objective graph theoretic-based method,\u201d Artificial Intell Med, vol. 123, p. 102228, Jan. 2022, doi: https:\/\/doi.org\/10.1016\/j.artmed.2021.102228.","DOI":"10.1016\/j.artmed.2021.102228"},{"key":"16788_CR29","doi-asserted-by":"crossref","unstructured":"Arora R, Kaur A (2022) Heterogeneous Fault Prediction Using Feature Selection and Supervised Learning Algorithms. Vietnam J Comput Sci:1\u201324","DOI":"10.1142\/S2196888822500142"},{"key":"16788_CR30","doi-asserted-by":"crossref","unstructured":"Kumar R, Chaturvedi A, Kailasam L (2022) An Unsupervised Software Fault Prediction Approach Using Threshold Derivation. IEEE Trans Reliabil:1\u201322","DOI":"10.1109\/TR.2022.3151125"},{"key":"16788_CR31","doi-asserted-by":"crossref","unstructured":"Sta\u0144czyk U (2014) \u201cFeature Evaluation by Filter, Wrapper, and Embedded Approaches.\u201d Feature Selection for Data and Pattern Recognition, Part of the Studies in Computational Intelligence book series (SCI,volume 584)","DOI":"10.1007\/978-3-662-45620-0_3"},{"key":"16788_CR32","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1109\/IAdCC.2014.6779292","volume":"2014","author":"A Nagpal","year":"2014","unstructured":"Nagpal A, Gaur D, Gaur S (2014) Feature selection using mutual information for high- dimensional data sets. IEEE Int Adv Comput Conf (IACC) 2014:45\u201349. https:\/\/doi.org\/10.1109\/IAdCC.2014.6779292","journal-title":"IEEE Int Adv Comput Conf (IACC)"},{"key":"16788_CR33","doi-asserted-by":"crossref","unstructured":"Jovi\u0107 A, Brki\u0107 K, Bogunovi\u0107 N (2015) \u201cA review of feature selection methods with applications\u201d, 38th International Convention on Information and Communication Technology, Electronics and Microelectronics (MIPRO)","DOI":"10.1109\/MIPRO.2015.7160458"},{"key":"16788_CR34","doi-asserted-by":"crossref","unstructured":"Rathore SS, Chouhan SS, Jain DK, Vachhani AG (2022) \u201cGenerative Oversampling Methods for Handling Imbalanced Data in Software Fault Prediction,\u201d IEEE Transactions on Reliability, pp. 1\u201316","DOI":"10.1109\/TR.2022.3158949"},{"key":"16788_CR35","doi-asserted-by":"crossref","unstructured":"F. Charte, A. J. Rivera, M. J. del Jesus, and F. Herrera, \u201cAddressing imbalance in multilabel classification: Measures and random resampling algorithms,\u201d Neurocomputing, vol. 163, pp. 3\u201316, Sep. 2015.","DOI":"10.1016\/j.neucom.2014.08.091"},{"key":"16788_CR36","doi-asserted-by":"crossref","unstructured":"K. E. Bennin, J. W. Keung, and A. Monden, \u201cOn the relative value of data resampling approaches for software defect prediction,\u201d Empirical Software Eng, vol. 24, no. 2, pp. 602\u2013636, Jun. 2018.","DOI":"10.1007\/s10664-018-9633-6"},{"key":"16788_CR37","doi-asserted-by":"crossref","unstructured":"E. F. Swana, W. Doorsamy, and P. Bokoro, \u201cTomek Link and SMOTE Approaches for Machine Fault Classification with an Imbalanced Dataset,\u201d Sensors, vol. 22, no. 9, p. 3246, Apr. 2022.","DOI":"10.3390\/s22093246"},{"key":"16788_CR38","doi-asserted-by":"crossref","unstructured":"Jonathan B, Putra PH, Ruldeviyani Y (2020) \u201cObservation Imbalanced Data Text to Predict Users Selling Products on Female Daily with SMOTE, Tomek, and SMOTE-Tomek,\u201d 2020 IEEE International Conference on Industry 4.0, Artificial Intelligence, and Communications Technology (IAICT)","DOI":"10.1109\/IAICT50021.2020.9172033"},{"key":"16788_CR39","doi-asserted-by":"crossref","unstructured":"Huang Y and Li L (2011) \u201cNaive Bayes classification algorithm based on small sample set,\u201d 2011 IEEE International Conference on Cloud Computing and Intelligence Systems","DOI":"10.1109\/CCIS.2011.6045027"},{"key":"16788_CR40","doi-asserted-by":"crossref","unstructured":"M. Khanna, A. Toofani, S. Bansal, and M. Asif, \u201cPerformance Comparison of Various Algorithms During Software Fault Prediction,\u201d Int J Grid and High Perform Comput, vol. 13, no. 2, pp. 70\u201394, Apr. 2021.","DOI":"10.4018\/IJGHPC.2021040105"},{"key":"16788_CR41","doi-asserted-by":"crossref","unstructured":"S. Goyal, \u201cHandling Class-Imbalance with KNN (Neighbourhood) Under-Sampling for Software Defect Prediction,\u201d Artificial Intell Rev, vol. 55, no. 3, pp. 2023\u20132064, Aug. 2021.","DOI":"10.1007\/s10462-021-10044-w"},{"key":"16788_CR42","doi-asserted-by":"crossref","unstructured":"Palak and Gulia P (2022) \u201cDecision tree\u2013based improved software fault prediction: a computational intelligence approach,\u201d Computational Intelligence in Software Modeling, pp. 163\u2013176","DOI":"10.1515\/9783110709247-011"},{"key":"16788_CR43","doi-asserted-by":"crossref","unstructured":"Kramer O (2017) \u201cGenetic Algorithms\u201d, In: Genetic Algorithm Essentials, Part of the Studies in Computational Intelligence book series (SCI, volume 679)","DOI":"10.1007\/978-3-319-52156-5"},{"issue":"1","key":"16788_CR44","first-page":"124","volume":"4","author":"M Tabassum","year":"2014","unstructured":"Tabassum M, Mathew K (2014) A genetic algorithm analysis towards optimization solutions. Int J Digital Inform Wireless Commun (IJDIWC) 4(1):124\u2013142","journal-title":"Int J Digital Inform Wireless Commun (IJDIWC)"},{"key":"16788_CR45","doi-asserted-by":"crossref","unstructured":"K. M. Hamdia, X. Zhuang, and T. Rabczuk, \u201cAn efficient optimization approach for designing machine learning models based on genetic algorithm,\u201d Neural Comput Appl, vol. 33, no. 6, pp. 1923\u20131933, Jun. 2020.","DOI":"10.1007\/s00521-020-05035-x"},{"key":"16788_CR46","doi-asserted-by":"crossref","unstructured":"I. D. Raji, H. Bello-Salau, I. J. Umoh, A. J. Onumanyi, M. A. Adegboye, and A. T. Salawudeen, \u201cSimple Deterministic Selection-Based Genetic Algorithm for Hyperparameter Tuning of Machine Learning Models,\u201d Appl Sci, vol. 12, no. 3, p. 1186, Jan. 2022.","DOI":"10.3390\/app12031186"},{"key":"16788_CR47","unstructured":"Available at https:\/\/sklearn-genetic-opt.readthedocs.io\/"},{"key":"16788_CR48","doi-asserted-by":"crossref","unstructured":"Mangla M, Sharma N, Mohanty SN (2021) \u201cA sequential ensemble model for software fault prediction,\u201d Innov Syst Software Eng","DOI":"10.1007\/s11334-021-00390-x"},{"key":"16788_CR49","doi-asserted-by":"crossref","unstructured":"S. S. Rathore and S. Kumar, \u201cSoftware fault prediction based on the dynamic selection of learning technique: findings from the eclipse project study,\u201d Appl Intell, vol. 51, no. 12, pp. 8945\u20138960, Apr. 2021.","DOI":"10.1007\/s10489-021-02346-x"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16788-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T11:20:15Z","timestamp":1715080815000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16788-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,27]]},"references-count":49,"journal-issue":{"issue":"16","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["16788"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16788-7","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,27]]},"assertion":[{"value":"5 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 June 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 August 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}