{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,3]],"date-time":"2026-05-03T01:38:47Z","timestamp":1777772327409,"version":"3.51.4"},"reference-count":113,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T00:00:00Z","timestamp":1777420800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T00:00:00Z","timestamp":1777420800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1007\/s10664-026-10829-z","type":"journal-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T12:47:46Z","timestamp":1777466866000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Predicting Post-release Defects with Knowledge Units (KUs) of Programming Languages: An Empirical Study"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3709-2741","authenticated-orcid":false,"given":"Md","family":"Ahasanuzzaman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gustavo A.","family":"Oliva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmed E.","family":"Hassan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen Ming","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,29]]},"reference":[{"key":"10829_CR1","doi-asserted-by":"crossref","unstructured":"Aggarwal KK, Singh Y, Kaur A, Malhotra R (2009a) Empirical analysis for investigating the effect of object-oriented metrics on fault proneness: a replicated case study. Softw Process: Improve Pract 14(1):39\u201362","DOI":"10.1002\/spip.389"},{"key":"10829_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal K, Singh Y, Kaur A, Malhotra R (2009b) Empirical analysis for investigating the effect of object-oriented metrics on fault proneness: a replicated case study. Softw Process: Improve Pract 14(1):39\u201362","DOI":"10.1002\/spip.389"},{"issue":"1","key":"10829_CR3","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/s10664-023-10421-9","volume":"29","author":"M Ahasanuzzaman","year":"2024","unstructured":"Ahasanuzzaman M, Oliva GA, Hassan AE (2024) Using knowledge units of programming languages to recommend reviewers for pull requests: an empirical study. Empir Softw Eng 29(1):33","journal-title":"Empir Softw Eng"},{"issue":"11","key":"10829_CR4","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.3390\/math9111180","volume":"9","author":"EN Akimova","year":"2021","unstructured":"Akimova EN, Bersenev AY, Deikov AA, Kobylkin KS, Konygin AV, Mezentsev IP, Misilov VE (2021) A survey on software defect prediction using deep learning. Mathematics 9(11):1180","journal-title":"Mathematics"},{"issue":"5","key":"10829_CR5","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1109\/TSE.2007.1005","volume":"33","author":"C Andersson","year":"2007","unstructured":"Andersson C, Runeson P (2007) A replicated quantitative analysis of fault distributions in complex software systems. IEEE Trans Software Eng 33(5):273\u2013286","journal-title":"IEEE Trans Software Eng"},{"issue":"5","key":"10829_CR6","doi-asserted-by":"publisher","first-page":"3981","DOI":"10.1007\/s00521-021-06659-3","volume":"34","author":"P Ardimento","year":"2022","unstructured":"Ardimento P, Aversano L, Bernardi ML, Cimitile M, Iammarino M (2022) Just-in-time software defect prediction using deep temporal convolutional networks. Neural Comput Appl 34(5):3981\u20134001","journal-title":"Neural Comput Appl"},{"key":"10829_CR7","doi-asserted-by":"crossref","unstructured":"Arisholm E, Briand LC (2006) Predicting fault-prone components in a java legacy system. In: Proceedings of the 2006 ACM\/IEEE international symposium on Empirical software engineering, pp 8\u201317","DOI":"10.1145\/1159733.1159738"},{"key":"10829_CR8","doi-asserted-by":"crossref","unstructured":"Arisholm E, Briand LC, Fuglerud M (2007) Data mining techniques for building fault-proneness models in telecom java software. In: Proceedings of the 18th IEEE international symposium on software reliability, pp 215\u2013224","DOI":"10.1109\/ISSRE.2007.22"},{"issue":"1","key":"10829_CR9","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1016\/j.jss.2009.06.055","volume":"83","author":"E Arisholm","year":"2010","unstructured":"Arisholm E, Briand LC, Johannessen EB (2010) A systematic and comprehensive investigation of methods to build and evaluate fault prediction models. J Syst Softw 83(1):2\u201317","journal-title":"J Syst Softw"},{"issue":"10","key":"10829_CR10","doi-asserted-by":"publisher","first-page":"8675","DOI":"10.1016\/j.jksuci.2021.09.010","volume":"34","author":"US Bhutamapuram","year":"2022","unstructured":"Bhutamapuram US, Sadam R (2022) With-in-project defect prediction using bootstrap aggregation based diverse ensemble learning technique. J King Saud Univ Comput Inf Sci 34(10):8675\u20138691","journal-title":"J King Saud Univ Comput Inf Sci"},{"issue":"1","key":"10829_CR11","first-page":"1063","volume":"13","author":"G Biau","year":"2012","unstructured":"Biau G (2012) Analysis of a random forests model. J Mach Learn Res 13(1):1063\u20131095","journal-title":"J Mach Learn Res"},{"key":"10829_CR12","doi-asserted-by":"crossref","unstructured":"Briand LC, Daly J, Porter V, Wust J (1998) A comprehensive empirical validation of design measures for object-oriented systems. In: Proceedings Fifth International Software Metrics Symposium. Metrics (Cat. No. 98TB100262), pp 246\u2013257","DOI":"10.1109\/METRIC.1998.731251"},{"key":"10829_CR13","doi-asserted-by":"crossref","unstructured":"Briand LC, W\u00fcst J, Daly JW, Porter DV (2000) Exploring the relationship between design measures and software quality in object-oriented systems. J Syst Softw 51(3):245\u2013273","DOI":"10.1016\/S0164-1212(99)00102-8"},{"key":"10829_CR14","doi-asserted-by":"crossref","unstructured":"Burrows R, Ferrari FC, Lemos OA, Garcia A, Taiani F (2010) The impact of coupling on the fault-proneness of aspect-oriented programs: An empirical study. In: Proceedings of the 21st international symposium on software reliability engineering, pp 329\u2013338","DOI":"10.1109\/ISSRE.2010.33"},{"issue":"6","key":"10829_CR15","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1007\/s10664-022-10186-7","volume":"27","author":"J \u00c7arka","year":"2022","unstructured":"\u00c7arka J, Esposito M, Falessi D (2022) On effort-aware metrics for defect prediction. Empir Softw Eng 27(6):152","journal-title":"Empir Softw Eng"},{"key":"10829_CR16","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002) Smote: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357","journal-title":"J Artif Intell Res"},{"key":"10829_CR17","doi-asserted-by":"publisher","first-page":"184832","DOI":"10.1109\/ACCESS.2019.2961129","volume":"7","author":"D Chen","year":"2019","unstructured":"Chen D, Chen X, Li H, Xie J, Mu Y (2019) Deepcpdp: Deep learning based cross-project defect prediction. IEEE Access 7:184832\u2013184848","journal-title":"IEEE Access"},{"issue":"6","key":"10829_CR18","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1016\/j.ygeno.2012.04.003","volume":"99","author":"X Chen","year":"2012","unstructured":"Chen X, Ishwaran H (2012) Random forests for genomic data analysis. Genomics 99(6):323\u2013329","journal-title":"Genomics"},{"issue":"6","key":"10829_CR19","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1109\/32.295895","volume":"20","author":"SR Chidamber","year":"1994","unstructured":"Chidamber SR, Kemerer CF (1994) A Metrics Suite for Object Oriented Design. IEEE Trans Software Eng 20(6):476\u2013493","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR20","unstructured":"CRAN (2020) shapper: Wrapper of Python Library \u2019shap\u2019. https:\/\/cran.r-project.org\/web\/packages\/shapper\/index.html, (Last accessed: October 2025)"},{"key":"10829_CR21","doi-asserted-by":"crossref","unstructured":"Dai H, Xi J, Dai HL (2024) Improving effort-aware just-in-time defect prediction with weighted code churn and multi-objective slime mold algorithm. Heliyon 10(18)","DOI":"10.1016\/j.heliyon.2024.e37360"},{"issue":"6","key":"10829_CR22","doi-asserted-by":"publisher","first-page":"2086","DOI":"10.1109\/TSE.2021.3051492","volume":"48","author":"S Dalla Palma","year":"2021","unstructured":"Dalla Palma S, Di Nucci D, Palomba F, Tamburri DA (2021) Within-project defect prediction of infrastructure-as-code using product and process metrics. IEEE Trans Software Eng 48(6):2086\u20132104","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR23","doi-asserted-by":"crossref","unstructured":"Dam HK, Pham T, Ng SW, Tran T, Grundy J, Ghose A, Kim T, Kim CJ (2019) Lessons learned from using a deep tree-based model for software defect prediction in practice. In: 2019 IEEE\/ACM 16th international conference on mining software repositories (MSR), pp 46\u201357","DOI":"10.1109\/MSR.2019.00017"},{"key":"10829_CR24","unstructured":"Dem\u0161ar J (2006) Statistical comparisons of classifiers over multiple data sets. J Mach Learn Res 7(Jan):1\u201330"},{"key":"10829_CR25","doi-asserted-by":"publisher","first-page":"103381","DOI":"10.1016\/j.scico.2025.103381","volume":"248","author":"Y Ding","year":"2026","unstructured":"Ding Y, Han W, Li Z, Chen H, Chen L, Peng R, Jing XY (2026) Metric information mining with metric attention to boost software defect prediction performance. Sci Comput Program 248:103381","journal-title":"Sci Comput Program"},{"key":"10829_CR26","unstructured":"Eclipse (2020) Eclipse Java development tools (JDT). http:\/\/www.eclipse.org\/jdt\/, (Last accessed: October 2025)"},{"issue":"382","key":"10829_CR27","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1080\/01621459.1983.10477973","volume":"78","author":"B Efron","year":"1983","unstructured":"Efron B (1983) Estimating the error rate of a prediction rule: improvement on cross-validation. J Am Stat Assoc 78(382):316\u2013331","journal-title":"J Am Stat Assoc"},{"key":"10829_CR28","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/S0164-1212(00)00086-8","volume":"56","author":"KE Emam","year":"2001","unstructured":"Emam KE, Melo W, Machado JC (2001) The prediction of faulty classes using object-oriented design metrics. J Syst Softw 56:63\u201375","journal-title":"J Syst Softw"},{"key":"10829_CR29","doi-asserted-by":"crossref","unstructured":"English M, Exton C, Rigon I, Cleary B (2009) Fault detection and prediction in an open-source software project. In: Proceedings of the 5th international conference on predictor models in software engineering, pp 1\u201311","DOI":"10.1145\/1540438.1540462"},{"issue":"3","key":"10829_CR30","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1007\/s10515-020-00277-4","volume":"27","author":"G Esteves","year":"2020","unstructured":"Esteves G, Figueiredo E, Veloso A, Viggiato M, Ziviani N (2020) Understanding machine learning software defect predictions. Autom Softw Eng 27(3):369\u2013392","journal-title":"Autom Softw Eng"},{"issue":"6","key":"10829_CR31","doi-asserted-by":"publisher","first-page":"4805","DOI":"10.1007\/s10664-020-09868-x","volume":"25","author":"D Falessi","year":"2020","unstructured":"Falessi D, Huang J, Narayana L, Thai JF, Turhan B (2020) On the need of preserving order of data when validating within-project defect classifiers. Empir Softw Eng 25(6):4805\u20134830","journal-title":"Empir Softw Eng"},{"key":"10829_CR32","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D, Zhou M (2020) CodeBERT: A pre-trained model for programming and natural languages. In: Cohn T, He Y, Liu Y (eds) Findings of the Association for Computational Linguistics: EMNLP 2020, pp 1536\u20131547","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"issue":"8","key":"10829_CR33","doi-asserted-by":"publisher","first-page":"797","DOI":"10.1109\/32.879815","volume":"26","author":"NE Fenton","year":"2000","unstructured":"Fenton NE, Ohlsson N (2000) Quantitative analysis of faults and failures in a complex software system. IEEE Trans Software Eng 26(8):797\u2013814","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR34","doi-asserted-by":"crossref","unstructured":"Fu M, Tantithamthavorn C (2022) Linevul: a transformer-based line-level vulnerability prediction. In: Proceedings of the 19th international conference on mining software repositories, pp 608\u2013620","DOI":"10.1145\/3524842.3528452"},{"key":"10829_CR35","unstructured":"Garcia S, Herrera F (2008) An extension on \u201cstatistical comparisons of classifiers over multiple data sets\u201d for all pairwise comparisons. J Mach Learn Res 9(12)"},{"key":"10829_CR36","doi-asserted-by":"crossref","unstructured":"Ghotra B, McIntosh S, Hassan AE (2015) Revisiting the impact of classification techniques on the performance of defect prediction models. In: Proceedings of the 37th IEEE international conference on software engineering, pp 789\u2013800","DOI":"10.1109\/ICSE.2015.91"},{"key":"10829_CR37","doi-asserted-by":"crossref","unstructured":"Giger E, D\u2019Ambros M, Pinzger M, Gall HC (2012) Method-level bug prediction. In: Proceedings of the ACM-IEEE international symposium on empirical software engineering and measurement, pp 171\u2013180","DOI":"10.1145\/2372251.2372285"},{"issue":"4","key":"10829_CR38","doi-asserted-by":"publisher","first-page":"2440","DOI":"10.1109\/TSE.2022.3220740","volume":"49","author":"L Gong","year":"2022","unstructured":"Gong L, Zhang H, Zhang J, Wei M, Huang Z (2022) A comprehensive investigation of the impact of class overlap on software defect prediction. IEEE Trans Software Eng 49(4):2440\u20132458","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR39","doi-asserted-by":"crossref","unstructured":"Guo Y, Gao X, Zhang Z, Chan WK, Jiang B (2023) A study on the impact of pre-trained model on just-in-time defect prediction. In: 2023 IEEE 23rd international conference on software quality, reliability, and security (QRS), pp 105\u2013116","DOI":"10.1109\/QRS60937.2023.00020"},{"key":"10829_CR40","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, Zhou L, Duan N, Svyatkovskiy A, Fu S et al (2020) Graphcodebert: pre-training code representations with data flow. arXiv preprint arXiv:2009.08366"},{"issue":"10","key":"10829_CR41","doi-asserted-by":"publisher","first-page":"897","DOI":"10.1109\/TSE.2005.112","volume":"31","author":"T Gyimothy","year":"2005","unstructured":"Gyimothy T, Ferenc R, Siket I (2005) Empirical validation of object-oriented metrics on open source software for fault prediction. IEEE Trans Software Eng 31(10):897\u2013910","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR42","doi-asserted-by":"crossref","unstructured":"Hoang T, Dam HK, Kamei Y, Lo D, Ubayashi N (2019) DeepJIT: An end-to-end deep learning framework for just-in-time defect prediction. In: Proceedings of the 16th international conference on mining software repositories, pp 34\u201345","DOI":"10.1109\/MSR.2019.00016"},{"key":"10829_CR43","unstructured":"Hussain RG, Yow KC, Gori M (2025) Leveraging an enhanced codebert-based model for multiclass software defect prediction via defect classification. IEEE Access"},{"issue":"2","key":"10829_CR44","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1109\/TSE.2019.2891758","volume":"47","author":"J Jiarpakdee","year":"2019","unstructured":"Jiarpakdee J, Tantithamthavorn C, Hassan AE (2019) The impact of correlated metrics on the interpretation of defect models. IEEE Trans Software Eng 47(2):320\u2013331","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR45","doi-asserted-by":"crossref","unstructured":"Jiarpakdee J, Tantithamthavorn C, Ihara A, Matsumoto K (2016) A study of redundant metrics in defect prediction datasets. In: 2016 IEEE international symposium on software reliability engineering workshops (ISSREW), pp 51\u201352","DOI":"10.1109\/ISSREW.2016.30"},{"key":"10829_CR46","doi-asserted-by":"crossref","unstructured":"Jiarpakdee J, Tantithamthavorn C, Treude C (2018) Autospearman: automatically mitigating correlated software metrics for interpreting defect models. In: Proceedings of the 37th international conference on software maintenance and evolution, pp 92\u2013103","DOI":"10.1109\/ICSME.2018.00018"},{"issue":"5","key":"10829_CR47","doi-asserted-by":"publisher","first-page":"3590","DOI":"10.1007\/s10664-020-09848-1","volume":"25","author":"J Jiarpakdee","year":"2020","unstructured":"Jiarpakdee J, Tantithamthavorn C, Treude C (2020) The impact of automated feature selection techniques on the interpretation of defect models. Empir Softw Eng 25(5):3590\u20133638","journal-title":"Empir Softw Eng"},{"issue":"3","key":"10829_CR48","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1177\/001316445901900314","volume":"19","author":"HF Kaiser","year":"1959","unstructured":"Kaiser HF (1959) Computer program for varimax rotation in factor analysis. Educ Psychol Measur 19(3):413\u2013420","journal-title":"Educ Psychol Measur"},{"issue":"5","key":"10829_CR49","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1016\/j.infsof.2006.07.005","volume":"49","author":"S Kanmani","year":"2007","unstructured":"Kanmani S, Uthariaraj VR, Sankaranarayanan V, Thambidurai P (2007) Object-oriented software fault prediction using neural networks. Inf Softw Technol 49(5):483\u2013492","journal-title":"Inf Softw Technol"},{"issue":"2","key":"10829_CR50","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1007\/s10664-016-9437-5","volume":"22","author":"B Kitchenham","year":"2017","unstructured":"Kitchenham B, Madeyski L, Budgen D, Keung J, Brereton P, Charters S, Gibbs S, Pohthong A (2017) Robust statistical methods for empirical software engineering. Empir Softw Eng 22(2):579\u2013630","journal-title":"Empir Softw Eng"},{"key":"10829_CR51","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1007\/s10664-008-9080-x","volume":"13","author":"AG Koru","year":"2008","unstructured":"Koru AG, Emam KE, Zhang D, Liu H, Mathew D (2008) Theory of relative defect proneness: replicated studies on the functional form of the size-defect relationship. Empir Softw Eng 13:473\u2013498","journal-title":"Empir Softw Eng"},{"issue":"2","key":"10829_CR52","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSE.2008.90","volume":"35","author":"AG Koru","year":"2008","unstructured":"Koru AG, Zhang D, El Emam K, Liu H (2008) An investigation into the functional form of the size-defect relationship for software modules. IEEE Trans Software Eng 35(2):293\u2013304","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR53","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csi.2017.02.003","volume":"53","author":"L Kumar","year":"2017","unstructured":"Kumar L, Misra S, Rath SK (2017) An empirical analysis of the effectiveness of software metrics and fault prediction model for identifying faulty classes. Comput Stand Interfaces 53:1\u201332","journal-title":"Comput Stand Interfaces"},{"issue":"2","key":"10829_CR54","first-page":"255","volume":"22","author":"S Kwon","year":"2023","unstructured":"Kwon S, Lee S, Ryu D, Baik J (2023) Pre-trained model-based software defect prediction for edge-cloud systems. J Web Eng 22(2):255\u2013278","journal-title":"J Web Eng"},{"issue":"6","key":"10829_CR55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10664-021-09996-y","volume":"26","author":"C Laaber","year":"2021","unstructured":"Laaber C, Basmaci M, Salza P (2021) Predicting unstable software benchmarks using static source code features. Empir Softw Eng 26(6):1\u201353","journal-title":"Empir Softw Eng"},{"key":"10829_CR56","doi-asserted-by":"publisher","first-page":"83812","DOI":"10.1109\/ACCESS.2019.2925313","volume":"7","author":"H Liang","year":"2019","unstructured":"Liang H, Yu Y, Jiang L, Xie Z (2019) Seml: a semantic lstm model for software defect prediction. IEEE Access 7:83812\u201383824","journal-title":"IEEE Access"},{"key":"10829_CR57","doi-asserted-by":"crossref","unstructured":"Li J, He P, Zhu J, Lyu MR (2017) Software defect prediction via convolutional neural network. In: 2017 IEEE international conference on software quality, reliability and security (QRS), IEEE, pp 318\u2013328","DOI":"10.1109\/QRS.2017.42"},{"key":"10829_CR58","doi-asserted-by":"crossref","unstructured":"Li J, He P, Zhu J, Lyu MR (2017) Software defect prediction via convolutional neural network. In: Proceedings of the 20th IEEE international conference on software quality, reliability and security, pp 318\u2013328","DOI":"10.1109\/QRS.2017.42"},{"key":"10829_CR59","doi-asserted-by":"crossref","unstructured":"Li H, Li S, Sun J, Xing Z, Peng X, Liu M, Zhao X (2018) Improving api caveats accessibility by mining api caveats knowledge graph. In: 2018 IEEE international conference on software maintenance and evolution (ICSME), IEEE, pp 183\u2013193","DOI":"10.1109\/ICSME.2018.00028"},{"key":"10829_CR60","doi-asserted-by":"crossref","unstructured":"Liu Y, Liu M, Peng X, Treude C, Xing Z, Zhang X (2020a) Generating concept based api element comparison using a knowledge graph. In: Proceedings of the 35th IEEE\/ACM international conference on automated software engineering, pp 834\u2013845","DOI":"10.1145\/3324884.3416628"},{"key":"10829_CR61","doi-asserted-by":"crossref","unstructured":"Liu Q, Xiang J, Xu B, Zhao D, Hu W, Wang J (2020b) Aging-related bugs prediction via convolutional neural network. In: 2020 7th International conference on dependable systems and their applications (DSA), pp 90\u201398","DOI":"10.1109\/DSA51864.2020.00020"},{"issue":"12","key":"10829_CR62","doi-asserted-by":"publisher","first-page":"e2715","DOI":"10.1002\/smr.2715","volume":"36","author":"X Liu","year":"2024","unstructured":"Liu X, Zhou Y, Lu Z, Mei Y, Yang Y, Qian J, Zhou Y (2024) Unveiling the impact of unchanged modules across versions on the evaluation of within-project defect prediction models. J Softw Evol Process 36(12):e2715","journal-title":"J Softw Evol Process"},{"key":"10829_CR63","doi-asserted-by":"crossref","unstructured":"Liu J, Zhu X, Liu C, Cui X, Liu Q (2022) Cpgbert: An effective model for defect detection by learning program semantics via code property graph. In: 2022 IEEE international conference on trust. Security and Privacy in Computing and communications (TrustCom). IEEE, pp 274\u2013282","DOI":"10.1109\/TrustCom56396.2022.00046"},{"key":"10829_CR64","doi-asserted-by":"crossref","unstructured":"Long JD, Feng D, Cliff N (2003) Ordinal analysis of behavioral data. Handbook of psychology pp 635\u2013661","DOI":"10.1002\/0471264385.wei0225"},{"key":"10829_CR65","doi-asserted-by":"crossref","unstructured":"Ma\u0107kiewicz A, Ratajczak W (1993) Principal components analysis (PCA). Comput Geosci 19(3):303\u2013342","DOI":"10.1016\/0098-3004(93)90090-R"},{"issue":"3","key":"10829_CR66","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s10664-021-10068-4","volume":"27","author":"S Majumder","year":"2022","unstructured":"Majumder S, Mody P, Menzies T (2022) Revisiting process versus product metrics: a large scale analysis. Empir Softw Eng 27(3):60","journal-title":"Empir Softw Eng"},{"key":"10829_CR67","doi-asserted-by":"crossref","unstructured":"Malhotra R, Singh P (2024) Codebert-bigru for software defect prediction. In: International conference on artificial intelligence and speech technology. Springer, pp 277\u2013289","DOI":"10.1007\/978-3-031-91340-2_22"},{"key":"10829_CR68","unstructured":"Menzies T (2020) Scott Knot with nonparametric effect size and significance test. https:\/\/gist.github.com\/timm\/41b3a8790c1adce26d63c5874fbea393, (Last accessed: October 2025)"},{"key":"10829_CR69","unstructured":"Mezouar ME, Zhang F, Zou Y (2016) Local versus global models for effort-aware defect prediction. In: Proceedings of the 26th annual international conference on computer science and software engineering, pp 178\u2013187"},{"issue":"4","key":"10829_CR70","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1109\/TSE.2012.45","volume":"39","author":"N Mittas","year":"2013","unstructured":"Mittas N, Angelis L (2013) Ranking and clustering software cost estimation models through a multiple comparisons algorithm. IEEE Trans Software Eng 39(4):537\u2013551","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR71","unstructured":"Molnar C (2020) Interpretable machine learning"},{"key":"10829_CR72","doi-asserted-by":"crossref","unstructured":"Moser R, Pedrycz W, Succi G (2008) A comparative analysis of the efficiency of change metrics and static code attributes for defect prediction. In: Proceedings of the 30th international conference on software engineering, pp 181\u2013190","DOI":"10.1145\/1368088.1368114"},{"key":"10829_CR73","doi-asserted-by":"crossref","unstructured":"Moussa R, Sarro F (2022) On the use of evaluation measures for defect prediction studies. In: Proceedings of the 31st ACM SIGSOFT international symposium on software testing and analysis, pp 101\u2013113","DOI":"10.1145\/3533767.3534405"},{"key":"10829_CR74","doi-asserted-by":"crossref","unstructured":"Nagappan N, Ball T, Murphy B (2006a) Using historical in-process and product metrics for early estimation of software failures. In: Proceedings of the 17th international symposium on software reliability engineering, pp 62\u201374","DOI":"10.1109\/ISSRE.2006.50"},{"key":"10829_CR75","doi-asserted-by":"crossref","unstructured":"Nagappan N, Ball T, Zeller A (2006b) Mining metrics to predict component failures. In: Proceedings of the 28th international conference on software engineering, pp 452\u2013461","DOI":"10.1145\/1134285.1134349"},{"key":"10829_CR76","doi-asserted-by":"crossref","unstructured":"Nam D, Macvean A, Hellendoorn V, Vasilescu B, Myers B (2024) Using an llm to help with code understanding. In: Proceedings of the 46th international conference on software engineering, pp 1\u201313","DOI":"10.1145\/3597503.3639187"},{"issue":"7","key":"10829_CR77","doi-asserted-by":"publisher","first-page":"5723","DOI":"10.1007\/s11831-022-09787-8","volume":"29","author":"M Nevendra","year":"2022","unstructured":"Nevendra M, Singh P (2022) A survey of software defect prediction based on deep learning. Arch Comput Methods Eng 29(7):5723\u20135748","journal-title":"Arch Comput Methods Eng"},{"issue":"3","key":"10829_CR78","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1109\/TSE.2020.3001739","volume":"48","author":"C Ni","year":"2020","unstructured":"Ni C, Xia X, Lo D, Chen X, Gu Q (2020) Revisiting supervised and unsupervised methods for effort-aware cross-project defect prediction. IEEE Trans Software Eng 48(3):786\u2013802","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR79","doi-asserted-by":"crossref","unstructured":"Nu\u00f1ez-Varela AS, P\u00e9rez-Gonzalez HG, Mart\u00ednez-Perez FE, Soubervielle-Montalvo C (2017) Source code metrics: a systematic mapping study. J Syst Softw 128:164\u2013197","DOI":"10.1016\/j.jss.2017.03.044"},{"issue":"6","key":"10829_CR80","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1109\/TSE.2007.1015","volume":"33","author":"HM Olague","year":"2007","unstructured":"Olague HM, Etzkorn LH, Gholston S, Quattlebaum S (2007) Empirical validation of three software metrics suites to predict fault-proneness of object-oriented classes developed using highly iterative or agile software development processes. IEEE Trans Software Eng 33(6):402\u2013419","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR81","doi-asserted-by":"crossref","unstructured":"Omri S, Sinz C (2020) Deep learning for software defect prediction: a survey. In: Proceedings of the IEEE\/ACM 42nd international conference on software engineering workshops, pp 209\u2013214","DOI":"10.1145\/3387940.3391463"},{"key":"10829_CR82","unstructured":"Oracle (2022a) Oracle certified associate, Java SE 8 Programmer. https:\/\/education.oracle.com\/oracle-certified-associate-java-se-8-programmer\/trackp_333, (Last accessed: October 2025)"},{"key":"10829_CR83","unstructured":"Oracle (2022b) Oracle certified professional, Java EE 7 Application Developer. https:\/\/education.oracle.com\/oracle-certified-professional-java-ee-7-application-developer\/pexam_1Z0-900, (Last accessed: October 2025)"},{"key":"10829_CR84","unstructured":"Oracle (2022c) Oracle certified professional, Java SE 8 Programmer. https:\/\/education.oracle.com\/oracle-certified-professional-java-se-8-programmer\/trackp_357, (Last accessed: October 2025)"},{"issue":"4","key":"10829_CR85","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1109\/TSE.2005.49","volume":"31","author":"TJ Ostrand","year":"2005","unstructured":"Ostrand TJ, Weyuker EJ, Bell RM (2005) Predicting the location and number of faults in large software systems. IEEE Trans Software Eng 31(4):340\u2013355","journal-title":"IEEE Trans Software Eng"},{"issue":"4","key":"10829_CR86","doi-asserted-by":"publisher","first-page":"e2548","DOI":"10.1002\/smr.2548","volume":"36","author":"A Ouellet","year":"2024","unstructured":"Ouellet A, Badri M (2024) Combining object-oriented metrics and centrality measures to predict faults in object-oriented software: an empirical validation. J Softw Evol Process 36(4):e2548","journal-title":"J Softw Evol Process"},{"issue":"11","key":"10829_CR87","doi-asserted-by":"publisher","first-page":"4793","DOI":"10.3390\/app11114793","volume":"11","author":"C Pan","year":"2021","unstructured":"Pan C, Lu M, Xu B (2021) An empirical study on software defect prediction using codebert model. Appl Sci 11(11):4793","journal-title":"Appl Sci"},{"issue":"1","key":"10829_CR88","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1109\/TSE.2022.3144348","volume":"49","author":"C Pornprasit","year":"2022","unstructured":"Pornprasit C, Tantithamthavorn CK (2022) Deeplinedp: towards a deep learning approach for line-level defect prediction. IEEE Trans Software Eng 49(1):84\u201398","journal-title":"IEEE Trans Software Eng"},{"issue":"3","key":"10829_CR89","doi-asserted-by":"publisher","first-page":"e1301","DOI":"10.1002\/widm.1301","volume":"9","author":"P Probst","year":"2019","unstructured":"Probst P, Wright MN, Boulesteix AL (2019) Hyperparameters and tuning strategies for random forest. Wiley Interdisc Rev Data Mining Knowl Disc 9(3):e1301","journal-title":"Wiley Interdisc Rev Data Mining Knowl Disc"},{"key":"10829_CR90","doi-asserted-by":"crossref","unstructured":"Rahman F, Devanbu P (2013) How, and why, process metrics are better. In: Proceedings of the 35th international conference on software engineering, pp 432\u2013441","DOI":"10.1109\/ICSE.2013.6606589"},{"issue":"7","key":"10829_CR91","doi-asserted-by":"publisher","first-page":"2245","DOI":"10.1109\/TSE.2021.3056941","volume":"48","author":"GK Rajbahadur","year":"2022","unstructured":"Rajbahadur GK, Wang S, Oliva GA, Kamei Y, Hassan AE (2022) The impact of feature importance methods on the interpretation of defect classifiers. IEEE Trans Software Eng 48(7):2245\u20132261","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR92","doi-asserted-by":"crossref","unstructured":"Rebro DA, Chren S, Rossi B (2023) Source code metrics for software defects prediction. In: Proceedings of the 38th ACM\/SIGAPP symposium on applied computing, pp 1469\u20131472","DOI":"10.1145\/3555776.3577809"},{"key":"10829_CR93","unstructured":"Romano J, Kromrey J, Coraggio J, Skowronek J (2006) Appropriate statistics for ordinal level data: Should we really be using t-test and Cohen\u2019sd for evaluating group differences on the NSSE and other surveys? In: Annual meeting of the florida association of institutional research, pp 1\u20133"},{"key":"10829_CR94","doi-asserted-by":"publisher","first-page":"22582","DOI":"10.1109\/ACCESS.2024.3362896","volume":"12","author":"S Sahar","year":"2024","unstructured":"Sahar S, Younas M, Khan MM, Sarwar MU (2024) Dp-ccl: a supervised contrastive learning approach using codebert model in software defect prediction. IEEE Access 12:22582\u201322594","journal-title":"IEEE Access"},{"issue":"5","key":"10829_CR95","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1007\/s10664-023-10329-4","volume":"28","author":"F Santos","year":"2023","unstructured":"Santos F, Vargovich J, Trinkenreich B, Santos I, Penney J, Britto R, Pimentel JF, Wiese I, Steinmacher I, Sarma A et al (2023) Tag that issue: applying API-domain labels in issue tracking systems. Empir Softw Eng 28(5):116","journal-title":"Empir Softw Eng"},{"key":"10829_CR96","unstructured":"SciTools (2000) Understand by SciTools. https:\/\/scitools.com, (Last accessed: October 2025)"},{"issue":"6","key":"10829_CR97","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1109\/TSE.2014.2322358","volume":"40","author":"M Shepperd","year":"2014","unstructured":"Shepperd M, Bowes D, Hall T (2014) Researcher bias: the use of machine learning in software defect prediction. IEEE Trans Software Eng 40(6):603\u2013616","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR98","doi-asserted-by":"crossref","unstructured":"Shihab E, Jiang ZM, Ibrahim WM, Adams B, Hassan AE (2010) Understanding the impact of code and process metrics on post-release defects: a case study on the eclipse project. In: Proceedings of the 4th international symposium on empirical software engineering and measurement, pp 1\u201310","DOI":"10.1145\/1852786.1852792"},{"key":"10829_CR99","doi-asserted-by":"crossref","unstructured":"Tantithamthavorn C, Hassan AE (2018a) An experience report on defect modelling in practice: pitfalls and challenges. In: Proceedings of the 40th international conference on software engineering: software engineering in practice, pp 286\u2013295","DOI":"10.1145\/3183519.3183547"},{"key":"10829_CR100","doi-asserted-by":"crossref","unstructured":"Tantithamthavorn C, Hassan AE (2018b) An experience report on defect modelling in practice: Pitfalls and challenges. In: Proceedings of the 40th international conference on software engineering: software engineering in practice, pp 286\u2013295","DOI":"10.1145\/3183519.3183547"},{"issue":"1","key":"10829_CR101","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TSE.2016.2584050","volume":"43","author":"C Tantithamthavorn","year":"2016","unstructured":"Tantithamthavorn C, McIntosh S, Hassan AE, Matsumoto K (2016) An empirical comparison of model validation techniques for defect prediction models. IEEE Trans Software Eng 43(1):1\u201318","journal-title":"IEEE Trans Software Eng"},{"issue":"1","key":"10829_CR102","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TSE.2016.2584050","volume":"43","author":"C Tantithamthavorn","year":"2017","unstructured":"Tantithamthavorn C, McIntosh S, Hassan AE, Matsumoto K (2017) An empirical comparison of model validation techniques for defect prediction models. IEEE Trans Software Eng 43(1):1\u201318","journal-title":"IEEE Trans Software Eng"},{"issue":"7","key":"10829_CR103","doi-asserted-by":"publisher","first-page":"683","DOI":"10.1109\/TSE.2018.2794977","volume":"45","author":"C Tantithamthavorn","year":"2018","unstructured":"Tantithamthavorn C, McIntosh S, Hassan AE, Matsumoto K (2018) The impact of automated parameter optimization on defect prediction models. IEEE Trans Software Eng 45(7):683\u2013711","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR104","doi-asserted-by":"crossref","unstructured":"Tosun A, Turhan B, Bener A (2008) Ensemble of software defect predictors: a case study. In: Proceedings of the second ACM-IEEE international symposium on Empirical software engineering and measurement, pp 318\u2013320","DOI":"10.1145\/1414004.1414066"},{"key":"10829_CR105","doi-asserted-by":"crossref","unstructured":"Wang S, Liu T, Tan L (2016) Automatically learning semantic features for defect prediction. In: Proceedings of the 38th international conference on software engineering, pp 297\u2013308","DOI":"10.1145\/2884781.2884804"},{"issue":"5","key":"10829_CR106","doi-asserted-by":"publisher","first-page":"1480","DOI":"10.1109\/TSE.2020.3023177","volume":"48","author":"S Wattanakriengkrai","year":"2020","unstructured":"Wattanakriengkrai S, Thongtanunam P, Tantithamthavorn C, Hata H, Matsumoto K (2020) Predicting defective lines using a model-agnostic technique. IEEE Trans Software Eng 48(5):1480\u20131496","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR107","doi-asserted-by":"crossref","unstructured":"Yatish S, Jiarpakdee J, Thongtanunam P, Tantithamthavorn C (2019) Mining software defects: should we consider affected releases? In: Proceedings of the 41st international conference on software engineering, pp 654\u2013665","DOI":"10.1109\/ICSE.2019.00075"},{"issue":"1","key":"10829_CR108","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TSE.2024.3503723","volume":"51","author":"S Yin","year":"2025","unstructured":"Yin S, Guo S, Li H, Li C, Chen R, Li X, Jiang H (2025) Line-level defect prediction by capturing code contexts with graph convolutional networks. IEEE Trans Software Eng 51(1):172\u2013191","journal-title":"IEEE Trans Software Eng"},{"key":"10829_CR109","doi-asserted-by":"crossref","unstructured":"Zain ZM, Sakri S, Ismail NHA (2023) Application of deep learning in software defect prediction: systematic literature review and meta-analysis. Inf Softw Technol 158","DOI":"10.1016\/j.infsof.2023.107175"},{"key":"10829_CR110","doi-asserted-by":"crossref","unstructured":"Zhang H (2009) An investigation of the relationships between lines of code and defects. In: Proceedings of the 25th international conference on software maintenance, pp 274\u2013283","DOI":"10.1109\/ICSM.2009.5306304"},{"key":"10829_CR111","doi-asserted-by":"crossref","unstructured":"Zhang F, Hassan AE, McIntosh S, Zou Y (2016a) The use of summation to aggregate software metrics hinders the performance of defect prediction models. IEEE Trans Softw Eng 43(5), 476\u2013491","DOI":"10.1109\/TSE.2016.2599161"},{"key":"10829_CR112","doi-asserted-by":"crossref","unstructured":"Zhang F, Zheng Q, Zou Y, Hassan AE (2016b) Cross-project defect prediction using a connectivity-based unsupervised classifier. In: Proceedings of the 38th international conference on software engineering, pp 309\u2013320","DOI":"10.1145\/2884781.2884839"},{"issue":"10","key":"10829_CR113","doi-asserted-by":"publisher","first-page":"771","DOI":"10.1109\/TSE.2006.102","volume":"32","author":"Y Zhou","year":"2006","unstructured":"Zhou Y, Leung H (2006) Empirical analysis of object-oriented design metrics for predicting high and low severity faults. IEEE Trans Software Eng 32(10):771\u2013789","journal-title":"IEEE Trans Software Eng"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10829-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-026-10829-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10829-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T17:02:55Z","timestamp":1777482175000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-026-10829-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,29]]},"references-count":113,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2026,9]]}},"alternative-id":["10829"],"URL":"https:\/\/doi.org\/10.1007\/s10664-026-10829-z","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4,29]]},"assertion":[{"value":"26 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 April 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}}],"article-number":"128"}}