{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T02:11:10Z","timestamp":1769307070190,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSERC Discovery Grant","award":["315227"],"award-info":[{"award-number":["315227"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,7]]},"DOI":"10.1145\/3558489.3559073","type":"proceedings-article","created":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T20:08:53Z","timestamp":1668024533000},"page":"72-81","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["On the effectiveness of data balancing techniques in the context of ML-based test case prioritization"],"prefix":"10.1145","author":[{"given":"Jediael","family":"Mendoza","sequence":"first","affiliation":[{"name":"Carleton University, Canada"}]},{"given":"Jason","family":"Mycroft","sequence":"additional","affiliation":[{"name":"Carleton University, Canada"}]},{"given":"Lyam","family":"Milbury","sequence":"additional","affiliation":[{"name":"Carleton University, Canada"}]},{"given":"Nafiseh","family":"Kahani","sequence":"additional","affiliation":[{"name":"Carleton University, Canada"}]},{"given":"Jason","family":"Jaskolka","sequence":"additional","affiliation":[{"name":"Carleton University, Canada"}]}],"member":"320","published-online":{"date-parts":[[2022,11,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11219-016-9339-1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2015.134"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2015.134"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1002\/stvr.1486"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3070549"},{"key":"e_1_3_2_1_6_1","volume-title":"Ana LC Bazzan, and Maria Carolina Monard","author":"Batista Gustavo EAPA","year":"2003","unstructured":"Gustavo EAPA Batista, Ana LC Bazzan, and Maria Carolina Monard. 2003. Balancing training data for automated annotation of keywords: a case study. In WOB. IEEE, Maca\u00e9, Rio de Janeiro, Brazil. 10\u201318."},{"key":"e_1_3_2_1_7_1","volume-title":"A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD explorations newsletter, 6, 1","author":"Batista Gustavo EAPA","year":"2004","unstructured":"Gustavo EAPA Batista, Ronaldo C Prati, and Maria Carolina Monard. 2004. A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD explorations newsletter, 6, 1 (2004), 20\u201329."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2017.62"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380369"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273513"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.953"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3236024.3236053"},{"key":"e_1_3_2_1_14_1","unstructured":"Dang Van. 2020. RankLib. https:\/\/sourceforge.net\/p\/lemur\/wiki\/RankLib\/"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2001.919106"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2635868.2635910"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460319.3464834"},{"key":"e_1_3_2_1_18_1","unstructured":"Martin Fowler and Matthew Foemmel. 2006. Continuous integration."},{"key":"e_1_3_2_1_19_1","first-page":"933","article-title":"An efficient boosting algorithm for combining preferences","author":"Freund Yoav","year":"2003","unstructured":"Yoav Freund, Raj Iyer, Robert E Schapire, and Yoram Singer. 2003. An efficient boosting algorithm for combining preferences. Journal of machine learning research, 4, Nov (2003), 933\u2013969.","journal-title":"Journal of machine learning research, 4"},{"key":"e_1_3_2_1_20_1","first-page":"113","article-title":"Pairwise multiple comparison procedures with unequal n\u2019s and\/or variances: a Monte Carlo study","volume":"1","author":"Games Paul A","year":"1976","unstructured":"Paul A Games and John F Howell. 1976. Pairwise multiple comparison procedures with unequal n\u2019s and\/or variances: a Monte Carlo study. Journal of Educational Statistics, 1, 2 (1976), 113\u2013125.","journal-title":"Journal of Educational Statistics"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/11538059_91"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1968.1054155"},{"key":"e_1_3_2_1_23_1","volume-title":"ADASYN: adaptive synthetic sampling approach for imbalanced learning. In 2008 IEEE international joint conference on neural networks","author":"He Haibo","unstructured":"Haibo He, Yang Bai, Edwardo A Garcia, and Shutao Li. 2008. ADASYN: adaptive synthetic sampling approach for imbalanced learning. In 2008 IEEE international joint conference on neural networks (IEEE world congress on computational intelligence). IEEE, Hong Kong. 1322\u20131328."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.239"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2015.66"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2970276.2970358"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ESEM.2007.80"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1002\/smr.1794"},{"key":"e_1_3_2_1_29_1","volume-title":"Analysis of variance (ANOVA) comparing means of more than two groups. Restorative dentistry & endodontics, 39, 1","author":"Kim Hae-Young","year":"2014","unstructured":"Hae-Young Kim. 2014. Analysis of variance (ANOVA) comparing means of more than two groups. Restorative dentistry & endodontics, 39, 1 (2014), 74\u201377."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/581339.581357"},{"key":"e_1_3_2_1_31_1","first-page":"179","article-title":"Addressing the curse of imbalanced training sets: one-sided selection","volume":"97","author":"Kubat Miroslav","year":"1997","unstructured":"Miroslav Kubat and Stan Matwin. 1997. Addressing the curse of imbalanced training sets: one-sided selection. Icml, 97, 1 (1997), 179.","journal-title":"Icml"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3106237.3106288"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-48229-6_9"},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"Imbalanced-learn: a Python toolbox to tackle the curse of imbalanced datasets in machine learning","volume":"18","author":"Lema\u00eetre Guillaume","year":"2017","unstructured":"Guillaume Lema\u00eetre, Fernando Nogueira, and Christos K. Aridas. 2017. Imbalanced-learn: a Python toolbox to tackle the curse of imbalanced datasets in machine learning. Journal of Machine Learning Research, 18, 17 (2017), 1\u20135. http:\/\/jmlr.org\/papers\/v18\/16-365","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2020.106268"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of workshop on learning from imbalanced datasets. 126","author":"Mani Inderjeet","year":"2003","unstructured":"Inderjeet Mani and I Zhang. 2003. kNN approach to unbalanced data distributions: a case study involving information extraction. In Proceedings of workshop on learning from imbalanced datasets. 126, ICML, Washington, DC. 1\u20137."},{"key":"e_1_3_2_1_37_1","volume-title":"A common language effect size statistic.. Psychological bulletin, 111, 2","author":"McGraw Kenneth O","year":"1992","unstructured":"Kenneth O McGraw and Seok P Wong. 1992. A common language effect size statistic.. Psychological bulletin, 111, 2 (1992), 361."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-006-9019-z"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings: Fifth International Workshop on Computational Intelligence & Applications. IEEE SMC Hiroshima Chapter","author":"Nguyen Hien M","year":"2009","unstructured":"Hien M Nguyen, Eric W Cooper, and Katsuari Kamei. 2009. Borderline over-sampling for imbalanced data classification. In Proceedings: Fifth International Workshop on Computational Intelligence & Applications. IEEE SMC Hiroshima Chapter, Hiroshima, Japan. 24\u201329."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460319.3464840"},{"key":"e_1_3_2_1_41_1","first-page":"1","article-title":"Test case selection and prioritization using machine learning: a systematic literature review","volume":"27","author":"Pan Rongqi","year":"2021","unstructured":"Rongqi Pan, Mojtaba Bagherzadeh, Taher Ahmed Ghaleb, and Lionel Briand. 2021. Test case selection and prioritization using machine learning: a systematic literature review. Empirical Software Engineering, 27, 29 (2021), 1\u201334.","journal-title":"Empirical Software Engineering"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3395363.3397383"},{"key":"e_1_3_2_1_43_1","volume-title":"Finding defective modules from highly unbalanced datasets. Actas de los Talleres de las Jornadas de Ingenier\u00eda del Software y Bases de Datos, 2, 1","author":"Riquelme JC","year":"2008","unstructured":"JC Riquelme, R Ruiz, D Rodr\u00edguez, and J Moreno. 2008. Finding defective modules from highly unbalanced datasets. Actas de los Talleres de las Jornadas de Ingenier\u00eda del Software y Bases de Datos, 2, 1 (2008), 67\u201374."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.1999.792604"},{"key":"e_1_3_2_1_45_1","volume-title":"MSR","author":"Shu Rui","unstructured":"Rui Shu, Tianpei Xia, Laurie Williams, and Tim Menzies. 2022. Dazzle: using optimized generative adversarial networks to address security data class imbalance issue. In MSR. ACM, Pittsburgh, PA, USA. 1\u201312."},{"key":"e_1_3_2_1_46_1","volume-title":"An instance level analysis of data complexity. Machine learning, 95, 2","author":"Smith Michael R","year":"2014","unstructured":"Michael R Smith, Tony Martinez, and Christophe Giraud-Carrier. 2014. An instance level analysis of data complexity. Machine learning, 95, 2 (2014), 225\u2013256."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3092703.3092709"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2018.2876537"},{"key":"e_1_3_2_1_49_1","first-page":"448","article-title":"An experiment with the edited nearest-neighbor rule","volume":"6","author":"Tomek Ivan","year":"1976","unstructured":"Ivan Tomek. 1976. An experiment with the edited nearest-neighbor rule. IEEE Transactions on Systems, Man, and Cybernetics: Systems, 6, 6 (1976), 448\u2013452.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems"},{"key":"e_1_3_2_1_50_1","first-page":"769","article-title":"Two modifications of CNN","volume":"11","author":"Tomek Ivan","year":"1976","unstructured":"Ivan Tomek. 1976. Two modifications of CNN. IEEE Transactions on Systems, Man, and Cybernetics: Systems, SMC-6, 11 (1976), 769\u2013772.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems, SMC-6"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.2307\/2332510"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1972.4309137"},{"key":"e_1_3_2_1_53_1","volume-title":"Adapting boosting for information retrieval measures. Information Retrieval, 13, 3","author":"Wu Qiang","year":"2010","unstructured":"Qiang Wu, Christopher J. C. Burges, Krysta M. Svore, and Jianfeng Gao. 2010. Adapting boosting for information retrieval measures. Information Retrieval, 13, 3 (2010), 01 Jun, 254\u2013270. issn:1573-7659"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3184842"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIFE.2010.5609385"}],"event":{"name":"PROMISE '22: 18th International Conference on Predictive Models and Data Analytics in Software Engineering","location":"Singapore Singapore","acronym":"PROMISE '22","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","NUS NUS"]},"container-title":["Proceedings of the 18th International Conference on Predictive Models and Data Analytics in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3558489.3559073","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3558489.3559073","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:08Z","timestamp":1750178828000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3558489.3559073"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,7]]},"references-count":55,"alternative-id":["10.1145\/3558489.3559073","10.1145\/3558489"],"URL":"https:\/\/doi.org\/10.1145\/3558489.3559073","relation":{},"subject":[],"published":{"date-parts":[[2022,11,7]]},"assertion":[{"value":"2022-11-09","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}