{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:19:29Z","timestamp":1773839969659,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2018,3,8]],"date-time":"2018-03-08T00:00:00Z","timestamp":1520467200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61070013"],"award-info":[{"award-number":["61070013"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Soft Comput"],"published-print":{"date-parts":[[2018,5]]},"DOI":"10.1007\/s00500-018-3093-1","type":"journal-article","created":{"date-parts":[[2018,3,8]],"date-time":"2018-03-08T05:49:26Z","timestamp":1520488166000},"page":"3461-3472","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":47,"title":["Cross-company defect prediction via semi-supervised clustering-based data filtering and MSTrA-based transfer learning"],"prefix":"10.1007","volume":"22","author":[{"given":"Xiao","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Man","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiheng","family":"Jian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kwabena Ebo","family":"Bennin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mandi","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuanxiang","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,3,8]]},"reference":[{"key":"3093_CR1","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1016\/j.asoc.2015.04.045","volume":"33","author":"\u00d6mer Faruk Arar","year":"2015","unstructured":"Arar \u00d6mer Faruk, Ayan K\u00fcr\u015fat (2015) Software defect prediction using cost-sensitive neural network. Appl Soft Comput 33:263\u2013277","journal-title":"Appl Soft Comput"},{"key":"3093_CR2","doi-asserted-by":"publisher","unstructured":"Bennin KE, Keung J, Phannachitta P, et al (2017) MAHAKIL: diversity based oversampling approach to alleviate the class imbalance issue in software defect prediction. IEEE Trans Softw Eng. https:\/\/doi.org\/10.1109\/TSE.2017.2731766","DOI":"10.1109\/TSE.2017.2731766"},{"key":"3093_CR3","doi-asserted-by":"crossref","unstructured":"Bennin K, Keung J, Monden A, et al (2017) The significant effects of data sampling approaches on software defect prioritization and classification. In:11th International symposium on empirical software engineering and measurement, ESEM 2017","DOI":"10.1109\/ESEM.2017.50"},{"key":"3093_CR4","unstructured":"Boetticher G, Menzies T, Ostrand T (2007) PROMISE Repository of empirical software engineering data, West Virginia University, Department of Computer Science. http:\/\/promisedata.org\/repository"},{"issue":"1","key":"3093_CR5","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45(1):5\u201332","journal-title":"Mach Learn"},{"issue":"7","key":"3093_CR6","doi-asserted-by":"publisher","first-page":"706","DOI":"10.1109\/TSE.2002.1019484","volume":"28","author":"LC Briand","year":"2002","unstructured":"Briand LC, Melo WL, Wust J (2002) Assessing the applicability of fault-proneness models across object-oriented software projects. IEEE Trans Softw Eng 28(7):706\u2013720","journal-title":"IEEE Trans Softw Eng"},{"key":"3093_CR7","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002) SMOTE: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357","journal-title":"J Artif Intell Res"},{"key":"3093_CR8","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.infsof.2015.01.014","volume":"62","author":"L Chen","year":"2015","unstructured":"Chen L, Fang B, Shang Z et al (2015) Negative samples reduction in cross-company software defects prediction. Inf Softw Technol 62:67\u201377","journal-title":"Inf Softw Technol"},{"key":"3093_CR9","doi-asserted-by":"crossref","unstructured":"Dai W et al (2007) Boosting for transfer learning. In: 24th International conference on Machine learning, pp 193\u2013200","DOI":"10.1145\/1273496.1273521"},{"key":"3093_CR10","unstructured":"Dhanajayan RCG, Pillai SA (2016) SLMBC: spiral life cycle model-based Bayesian classification technique for efficient software fault prediction and classification, Soft Computing, 1-13"},{"issue":"5","key":"3093_CR11","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1016\/j.jss.2007.07.040","volume":"81","author":"KO Elish","year":"2008","unstructured":"Elish KO, Elish MO (2008) Predicting defect-prone software modules using support vector machines. Softw J Syst Softw 81(5):649\u2013660","journal-title":"Softw J Syst Softw"},{"key":"3093_CR12","doi-asserted-by":"publisher","first-page":"1020","DOI":"10.1016\/j.asoc.2016.08.025","volume":"49","author":"Ezgi Erturk","year":"2016","unstructured":"Erturk Ezgi, Sezer Ebru Akcapinar (2016) Iterative software fault prediction with a hybrid approach. Appl Soft Comput 49:1020\u20131033","journal-title":"Appl Soft Comput"},{"key":"3093_CR13","unstructured":"Field AP (2001) Discovering statistics using SPSS for windows: advanced techniques for beginners, pp 551\u2013552"},{"issue":"7","key":"3093_CR14","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1109\/T-C.1975.224297","volume":"100","author":"K Fukunaga","year":"1975","unstructured":"Fukunaga K, Narendra PM (1975) A branch and bound algorithm for computing k-nearest neighbors. IEEE Trans Comput 100(7):750\u2013753","journal-title":"IEEE Trans Comput"},{"key":"3093_CR15","doi-asserted-by":"crossref","unstructured":"Gray D, Bowes D, Davey N, et al (2009) Using the support vector machine as a classification method for software defect prediction with static code metrics. In: International conference on engineering applications of neural networks. Springer, Berlin, pp 223\u2013234","DOI":"10.1007\/978-3-642-03969-0_21"},{"issue":"6","key":"3093_CR16","doi-asserted-by":"publisher","first-page":"1276","DOI":"10.1109\/TSE.2011.103","volume":"38","author":"T Hall","year":"2012","unstructured":"Hall T, Beecham S, Bowes D, Gray D, Counsell S (2012) A systematic literature review on fault prediction performance in software engineering. IEEE Trans Softw Eng 38(6):1276\u20131304","journal-title":"IEEE Trans Softw Eng"},{"key":"3093_CR17","unstructured":"Hosmer DW, Lemeshow S (2000) Introduction to the logistic regression model. Appl Logist Regres 1\u201330"},{"issue":"8","key":"3093_CR18","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1016\/j.patrec.2009.09.011","volume":"31","author":"K Jain","year":"2010","unstructured":"Jain K (2010) Data clustering: 50 years beyond K-means. Pattern Recognit Lett 31(8):651\u2013666","journal-title":"Pattern Recognit Lett"},{"key":"3093_CR19","doi-asserted-by":"crossref","unstructured":"Jing X et al (2015) Heterogeneous cross-company defect prediction by unified metric representation and CCA-based transfer learning. In Proceedings of the 10th joint meeting on foundations of software engineering, pp 496\u2013507","DOI":"10.1145\/2786805.2786813"},{"key":"3093_CR20","doi-asserted-by":"crossref","unstructured":"Jing XY, Ying S, Zhang ZW, Wu SS, Liu J (2014) Dictionary learning based software defect prediction. In: Proceedings of the 36th International Conference on Software Engineering, pp 414\u2013423","DOI":"10.1145\/2568225.2568320"},{"issue":"11","key":"3093_CR21","doi-asserted-by":"publisher","first-page":"1073","DOI":"10.1016\/j.infsof.2007.02.015","volume":"49","author":"V By Kampenes","year":"2007","unstructured":"Kampenes V By et al (2007) A systematic review of effect size in software engineering experiments. Inf Softw Technol 49(11):1073\u20131086","journal-title":"Inf Softw Technol"},{"key":"3093_CR22","doi-asserted-by":"crossref","unstructured":"Kawata K, Amasaki S, Yokogawa T (2016) Improving relevancy filter methods for cross-project defect prediction, applied computing & information technology, pp 1\u201312","DOI":"10.1007\/978-3-319-26396-0_1"},{"key":"3093_CR23","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1016\/j.infsof.2014.07.005","volume":"58","author":"IH Laradji","year":"2015","unstructured":"Laradji IH, Alshayeb M, Ghouti L (2015) Software defect prediction using ensemble learning on selected features. Inf. Softw. Technol. 58:388\u2013402","journal-title":"Inf. Softw. Technol."},{"key":"3093_CR24","doi-asserted-by":"crossref","unstructured":"Lelis L, Sander J (2009) Semi-supervised density-based clustering. In: 9th IEEE international conference on data mining, pp 842\u2013847","DOI":"10.1109\/ICDM.2009.143"},{"key":"3093_CR25","doi-asserted-by":"crossref","unstructured":"Lewis DD (1998) Naive (Bayes) at forty the independence assumption in information retrieval. In: European conference on machine learning, pp 4\u201315","DOI":"10.1007\/BFb0026666"},{"issue":"3","key":"3093_CR26","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1016\/j.infsof.2011.09.007","volume":"54","author":"Y Ma","year":"2012","unstructured":"Ma Y, Luo G, Zeng X, Chen A (2012) Transfer learning for cross-company software defect prediction. Inf Softw Technol 54(3):248\u2013256","journal-title":"Inf Softw Technol"},{"key":"3093_CR27","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1016\/j.asoc.2014.11.023","volume":"27","author":"Ruchika Malhotra","year":"2015","unstructured":"Malhotra Ruchika (2015) A systematic review of machine learning techniques for software fault prediction. Appl Soft Comput 27:504\u2013518","journal-title":"Appl Soft Comput"},{"key":"3093_CR28","doi-asserted-by":"publisher","first-page":"1085","DOI":"10.1016\/j.asoc.2016.06.023","volume":"49","author":"PP Diego Mesquita","year":"2016","unstructured":"Mesquita PP Diego et al (2016) Classification with reject option for software defect prediction. Appl Soft Comput 49:1085\u20131093","journal-title":"Appl Soft Comput"},{"key":"3093_CR29","doi-asserted-by":"crossref","unstructured":"Nam J, Pan SJ, Kim S (2013) Transfer defect learning. In: Proceedings of the 2013 international conference on software engineering. IEEE Press, pp 382\u2013391","DOI":"10.1109\/ICSE.2013.6606584"},{"issue":"6","key":"3093_CR30","doi-asserted-by":"publisher","first-page":"809","DOI":"10.1016\/j.ins.2008.11.007","volume":"179","author":"L Peng","year":"2009","unstructured":"Peng L, Yang B, Chen Y, Abraham A (2009) Data gravitation based classification. Inf Sci 179(6):809\u2013819","journal-title":"Inf Sci"},{"key":"3093_CR31","doi-asserted-by":"crossref","unstructured":"Peters F, Menzies T, Marcus A (2013) Better cross company defect prediction. In: Proceedings of the 10th international workshop on mining software repositories, pp 409\u2013418","DOI":"10.1109\/MSR.2013.6624057"},{"issue":"5","key":"3093_CR32","doi-asserted-by":"publisher","first-page":"969","DOI":"10.1007\/s11390-015-1575-5","volume":"30","author":"D Ryu","year":"2015","unstructured":"Ryu D, Jang JI, Baik J (2015) A hybrid instance selection using nearest-neighbor for cross-project defect prediction. J Comput Sci Technol 30(5):969\u2013980","journal-title":"J Comput Sci Technol"},{"issue":"5","key":"3093_CR33","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1002\/widm.38","volume":"1","author":"N Seliya","year":"2011","unstructured":"Seliya N, Khoshgoftaar TM (2011) The use of decision trees for cost- sensitive classification: an empirical study in software quality prediction. Wiley Interdiscip Rev Data Min Knowl Discov 1(5):448\u2013459","journal-title":"Wiley Interdiscip Rev Data Min Knowl Discov"},{"issue":"6","key":"3093_CR34","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1109\/TSE.2014.2322358","volume":"40","author":"M Shepperd","year":"2014","unstructured":"Shepperd M, Bowes D, Hall T (2014) Researcher bias The use of machine learning in software defect prediction. IEEE Trans Softw Eng 40(6):603\u2013616","journal-title":"IEEE Trans Softw Eng"},{"key":"3093_CR35","doi-asserted-by":"crossref","unstructured":"Shukla S, Radhakrishnan T, Muthukumaran K, et al (2016) Multi-objective cross-version defect prediction, Soft Computing 1-22","DOI":"10.1007\/s00500-016-2456-8"},{"key":"3093_CR36","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.is.2015.02.006","volume":"51","author":"MJ Siers","year":"2015","unstructured":"Siers MJ, Islam MZ (2015) Software defect prediction using a cost sensitive decision forest and voting, and a potential solution to the class imbalance problem. Inf. Syst. 51:62\u201371","journal-title":"Inf. Syst."},{"issue":"3","key":"3093_CR37","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1109\/TSE.2010.90","volume":"37","author":"Q Song","year":"2011","unstructured":"Song Q, Jia Z, Shepperd M et al (2011) A general software defect-proneness prediction framework. IEEE Trans Softw Eng 37(3):356\u2013370","journal-title":"IEEE Trans Softw Eng"},{"issue":"6","key":"3093_CR38","doi-asserted-by":"publisher","first-page":"1806","DOI":"10.1109\/TSMCC.2012.2226152","volume":"42","author":"Z Sun","year":"2012","unstructured":"Sun Z, Song Q, Zhu X (2012) Using coding-based ensemble learning to improve software defect prediction. IEEE Trans Syst Man Cybern Part C (Appl Rev) 42(6):1806\u20131817","journal-title":"IEEE Trans Syst Man Cybern Part C (Appl Rev)"},{"issue":"5","key":"3093_CR39","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/s10664-008-9103-7","volume":"14","author":"B Turhan","year":"2009","unstructured":"Turhan B, Menzies T, Bener AB, Di Stefano J (2009) On the relative value of cross-company and within-company data for defect prediction. Empir Softw Eng 14(5):540\u2013578","journal-title":"Empir Softw Eng"},{"issue":"6","key":"3093_CR40","doi-asserted-by":"publisher","first-page":"1101","DOI":"10.1016\/j.infsof.2012.10.003","volume":"55","author":"B Turhan","year":"2013","unstructured":"Turhan B, Tosun M\u0131s\u0131rl\u0131 A, Bener A (2013) Empirical evaluation of the effects of mixed project data on learning defect predictors. Inf Softw Technol 55(6):1101\u20131118","journal-title":"Inf Softw Technol"},{"issue":"8","key":"3093_CR41","doi-asserted-by":"publisher","first-page":"384","DOI":"10.4236\/jsea.2015.88038","volume":"8","author":"V Vashisht","year":"2015","unstructured":"Vashisht V, Lal M, Sureshchandar GS et al (2015) A framework for software defect prediction using neural networks. J Softw Eng Appl 8(8):384","journal-title":"J Softw Eng Appl"},{"key":"3093_CR42","unstructured":"Wang J, Shen B, Chen Y (2012) Compressed C4. 5 models for software defect prediction. In: 12th international conference on quality software, pp 13\u201316"},{"issue":"6","key":"3093_CR43","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"Frank Wilcoxon","year":"1945","unstructured":"Wilcoxon Frank (1945) Individual comparisons by ranking methods. Biom Bull 1(6):80\u201383","journal-title":"Biom Bull"},{"key":"3093_CR44","doi-asserted-by":"crossref","unstructured":"Yan Z, Chen X, Guo P (2010) Software defect prediction using fuzzy support vector regression. In: International Symposium on Neural Networks. Springer, Berlin Heidelberg, pp 17\u201324","DOI":"10.1007\/978-3-642-13318-3_3"},{"key":"3093_CR45","doi-asserted-by":"crossref","unstructured":"Yao Y, Doretto G (2010) Boosting for transfer learning with multiple sources. In: IEEE conference on computer vision and pattern recognition, pp 1855\u20131862","DOI":"10.1109\/CVPR.2010.5539857"},{"key":"3093_CR46","doi-asserted-by":"crossref","unstructured":"Zimmermann T, Nagappan N, Gall H, Giger E, Murphy B (2009) Cross-project defect prediction: a large scale experiment on data vs. domain vs. process. In: Proceedings of the the 7th joint meeting of the European software engineering conference and the ACM SIGSOFT symposium on The foundations of software engineering, pp 91\u2013100","DOI":"10.1145\/1595696.1595713"}],"container-title":["Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00500-018-3093-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-018-3093-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-018-3093-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T09:40:02Z","timestamp":1570873202000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00500-018-3093-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,8]]},"references-count":46,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2018,5]]}},"alternative-id":["3093"],"URL":"https:\/\/doi.org\/10.1007\/s00500-018-3093-1","relation":{},"ISSN":["1432-7643","1433-7479"],"issn-type":[{"value":"1432-7643","type":"print"},{"value":"1433-7479","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,8]]},"assertion":[{"value":"8 March 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"This article does not contain any studies with human participants.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}]}}