{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T03:25:32Z","timestamp":1772767532666,"version":"3.50.1"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2021,1]]},"DOI":"10.1007\/s10664-020-09916-6","type":"journal-article","created":{"date-parts":[[2021,1,11]],"date-time":"2021-01-11T21:35:11Z","timestamp":1610400911000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["What makes a popular academic AI repository?"],"prefix":"10.1007","volume":"26","author":[{"given":"Yuanrui","family":"Fan","sequence":"first","affiliation":[]},{"given":"Xin","family":"Xia","sequence":"additional","affiliation":[]},{"given":"David","family":"Lo","sequence":"additional","affiliation":[]},{"given":"Ahmed E.","family":"Hassan","sequence":"additional","affiliation":[]},{"given":"Shanping","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,5]]},"reference":[{"key":"9916_CR1","doi-asserted-by":"crossref","unstructured":"Aggarwal K, Hindle A, Stroulia E (2014) Co-evolution of project documentation and popularity within github. In: Proceedings of the 11th working conference on mining software repositories. ACM, pp 360\u2013363","DOI":"10.1145\/2597073.2597120"},{"key":"9916_CR2","doi-asserted-by":"crossref","unstructured":"Alves TL, Ypma C, Visser J (2010) Deriving metric thresholds from benchmark data. In: IEEE international conference on software maintenance. IEEE, pp 1\u201310","DOI":"10.1109\/ICSM.2010.5609747"},{"key":"9916_CR3","unstructured":"Balcan MF, Dick T, Sandholm T, Vitercik E (2018) Learning to branch. In: International conference on machine learning, pp 344\u2013353"},{"key":"9916_CR4","doi-asserted-by":"crossref","unstructured":"Bissyand\u00e9 TF, Thung F, Lo D, Jiang L, R\u00e9veillere L (2013) Popularity, interoperability, and impact of programming languages in 100,000 open source projects. In: 2013 IEEE 37th annual computer software and applications conference. IEEE, pp 303\u2013312","DOI":"10.1109\/COMPSAC.2013.55"},{"key":"9916_CR5","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent dirichlet allocation. J Mach Learn Res 3:993\u20131022","journal-title":"J Mach Learn Res"},{"issue":"1","key":"9916_CR6","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1145\/2723872.2723882","volume":"49","author":"C Boettiger","year":"2015","unstructured":"Boettiger C (2015) An introduction to docker for reproducible research. ACM SIGOPS Oper Syst Rev 49(1):71\u201379","journal-title":"ACM SIGOPS Oper Syst Rev"},{"key":"9916_CR7","doi-asserted-by":"crossref","unstructured":"Borges H, Hora A, Valente MT (2016a) Predicting the popularity of github repositories. In: Proceedings of the the 12th international conference on predictive models and data analytics in software engineering. ACM, p 9","DOI":"10.1145\/2972958.2972966"},{"key":"9916_CR8","doi-asserted-by":"crossref","unstructured":"Borges H, Hora A, Valente MT (2016b) Understanding the factors that impact the popularity of github repositories. In: 2016 IEEE international conference on software maintenance and evolution (ICSME). IEEE, pp 334\u2013344","DOI":"10.1109\/ICSME.2016.31"},{"issue":"1","key":"9916_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45(1):5\u201332","journal-title":"Mach Learn"},{"key":"9916_CR10","doi-asserted-by":"publisher","DOI":"10.4324\/9781315806730","volume-title":"Ordinal methods for behavioral data analysis","author":"N Cliff","year":"2014","unstructured":"Cliff N (2014) Ordinal methods for behavioral data analysis. Psychology Press, New York, NY"},{"issue":"3","key":"9916_CR11","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1145\/2812803","volume":"59","author":"C Collberg","year":"2016","unstructured":"Collberg C, Proebsting TA (2016) Repeatability in computer systems research. Commun ACM 59(3):62\u201369","journal-title":"Commun ACM"},{"key":"9916_CR12","unstructured":"Collobert R, Bengio S, Mari\u00e9thoz J (2002) Torch: a modular machine learning software library. Tech. rep., Idiap"},{"key":"9916_CR13","doi-asserted-by":"crossref","unstructured":"Cutler A, Cutler DR, Stevens JR (2012) Random forests. In: Ensemble machine learning. Springer, pp 157\u2013175","DOI":"10.1007\/978-1-4419-9326-7_5"},{"key":"9916_CR14","unstructured":"Fan Y, Xia X, Lo D, Hassan AE (2018a) Chaff from the wheat: characterizing and determining valid bug reports. In: IEEE transactions on software engineering"},{"issue":"6","key":"9916_CR15","doi-asserted-by":"publisher","first-page":"3346","DOI":"10.1007\/s10664-018-9602-0","volume":"23","author":"Y Fan","year":"2018","unstructured":"Fan Y, Xia X, Lo D, Li S (2018b) Early prediction of merged code changes to prioritize reviewing tasks. Empir Softw Eng 23(6):3346\u20133393","journal-title":"Empir Softw Eng"},{"key":"9916_CR16","doi-asserted-by":"crossref","unstructured":"Fan Y, Xia X, da Costa DA, Lo D, Hassan AE, Li S (2019) The impact of changes mislabeled by szz on just-in-time defect prediction. In: IEEE transactions on software engineering","DOI":"10.1109\/TSE.2019.2929761"},{"issue":"5","key":"9916_CR17","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1037\/h0031619","volume":"76","author":"JL Fleiss","year":"1971","unstructured":"Fleiss JL (1971) Measuring nominal scale agreement among many raters. Psychol Bull 76(5):378","journal-title":"Psychol Bull"},{"key":"9916_CR18","unstructured":"Fogel K (2005) Producing open source software: how to run a successful free software project. O\u2019Reilly Media, Inc"},{"key":"9916_CR19","doi-asserted-by":"crossref","unstructured":"Ghotra B, McIntosh S, Hassan AE (2015) Revisiting the impact of classification techniques on the performance of defect prediction models. In: 2015 IEEE\/ACM 37th IEEE international conference on software engineering, vol 1. IEEE, pp 789\u2013800","DOI":"10.1109\/ICSE.2015.91"},{"key":"9916_CR20","doi-asserted-by":"crossref","unstructured":"Gousios G, Pinzger M, Deursen AV (2014) An exploratory study of the pull-based software development model. In: Proceedings of the 36th international conference on software engineering. ACM, pp 345\u2013355","DOI":"10.1145\/2568225.2568260"},{"issue":"3","key":"9916_CR21","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1609\/aimag.v39i3.2816","volume":"39","author":"OE Gundersen","year":"2017","unstructured":"Gundersen OE, Gil Y, Aha DW (2017) On reproducible ai: towards reproducible research, open science, and digital scholarship in ai publications. AI Mag 39(3):56\u201368","journal-title":"AI Mag"},{"key":"9916_CR22","doi-asserted-by":"crossref","unstructured":"Han J, Deng S, Xia X, Wang D, Yin J (2019) Characterization and prediction of popular projects on github. In: 2019 IEEE 43rd annual computer software and applications conference (COMPSAC), vol 1. IEEE, pp 21\u201326","DOI":"10.1109\/COMPSAC.2019.00013"},{"key":"9916_CR23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-19425-7","volume-title":"Regression modeling strategies: with applications to linear models, logistic and ordinal regression, and survival analysis","author":"FEJr Harrell","year":"2015","unstructured":"Harrell FE Jr (2015) Regression modeling strategies: with applications to linear models, logistic and ordinal regression, and survival analysis. Springer, Berlin"},{"key":"9916_CR24","doi-asserted-by":"publisher","DOI":"10.1002\/9781118548387","volume-title":"Applied logistic regression, vol 398","author":"DWJr Hosmer","year":"2013","unstructured":"Hosmer DW Jr, Lemeshow S, Sturdiest RX (2013) Applied logistic regression, vol 398. Wiley, Hoboken"},{"issue":"1","key":"9916_CR25","doi-asserted-by":"publisher","first-page":"1268","DOI":"10.1186\/s40064-016-2897-7","volume":"5","author":"Y Hu","year":"2016","unstructured":"Hu Y, Zhang J, Bai X, Yu S, Yang Z (2016) Influence analysis of github repositories. SpringerPlus 5(1):1268","journal-title":"SpringerPlus"},{"issue":"3","key":"9916_CR26","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1109\/TKDE.2005.50","volume":"17","author":"J Huang","year":"2005","unstructured":"Huang J, Ling CX (2005) Using auc and accuracy in evaluating learning algorithms. IEEE Trans Knowl Data Eng 17(3):299\u2013310","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"1","key":"9916_CR27","doi-asserted-by":"publisher","first-page":"547","DOI":"10.1007\/s10664-016-9436-6","volume":"22","author":"J Jiang","year":"2017","unstructured":"Jiang J, Lo D, He J, Xia X, Kochhar PS, Zhang L (2017) Why and how developers fork what from whom in github. Empir Softw Eng 22(1):547\u2013578","journal-title":"Empir Softw Eng"},{"key":"9916_CR28","unstructured":"Kim M, Bergman L, Lau T, Notkin D (2004) An ethnographic study of copy and paste programming practices in oopl. In: Proceedings. 2004 International symposium on empirical software engineering, ISESE\u201904. IEEE, pp 83\u201392"},{"key":"9916_CR29","first-page":"1","volume":"9","author":"J Kimble","year":"1992","unstructured":"Kimble J (1992) Plain english: a charter for clear writing. TM Cooley L Rev 9:1","journal-title":"TM Cooley L Rev"},{"issue":"3","key":"9916_CR30","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1109\/TSE.2006.28","volume":"32","author":"Z Li","year":"2006","unstructured":"Li Z, Lu S, Myagmar S, Zhou Y (2006) Cp-miner: finding copy-paste and related bugs in large-scale software code. IEEE Trans Softw Eng 32 (3):176\u2013192","journal-title":"IEEE Trans Softw Eng"},{"key":"9916_CR31","unstructured":"Newman D, Lau JH, Grieser K, Baldwin T (2010) Automatic evaluation of topic coherence. In: Human language technologies: the 2010 annual conference of the North American chapter of the association for computational linguistics, pp 100\u2013108"},{"issue":"6242","key":"9916_CR32","doi-asserted-by":"publisher","first-page":"1422","DOI":"10.1126\/science.aab2374","volume":"348","author":"BA Nosek","year":"2015","unstructured":"Nosek BA, Alter G, Banks GC, Borsboom D, Bowman SD, Breckler SJ, Buck S, Chambers CD, Chin G, Christensen G, et al. (2015) Promoting an open research culture. Science 348(6242):1422\u20131425","journal-title":"Science"},{"key":"9916_CR33","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, et al. (2019) Pytorch: an imperative style high-performance deep learning library. In: Advances in neural information processing systems, pp 8024\u20138035"},{"issue":"1","key":"9916_CR34","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1145\/1007730.1007738","volume":"6","author":"C Phua","year":"2004","unstructured":"Phua C, Alahakoon D, Lee V (2004) Minority report in fraud detection: classification of skewed data. ACM SIGKDD Explor Newsl 6(1):50\u201359","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"9916_CR35","unstructured":"Portugal RLQ, do Prado Leite JCS (2016) Extracting requirements patterns from software repositories. In: 2016 IEEE 24th international requirements engineering conference workshops (REW). IEEE, pp 304\u2013307"},{"issue":"3","key":"9916_CR36","doi-asserted-by":"publisher","first-page":"1296","DOI":"10.1007\/s10664-018-9660-3","volume":"24","author":"GAA Prana","year":"2019","unstructured":"Prana GAA, Treude C, Thung F, Atapattu T, Lo D (2019) Categorizing the content of GitHub README files. Empir Softw Eng 24(3):1296\u20131327","journal-title":"Empir Softw Eng"},{"issue":"5","key":"9916_CR37","doi-asserted-by":"publisher","first-page":"1763","DOI":"10.1213\/ANE.0000000000002864","volume":"126","author":"P Schober","year":"2018","unstructured":"Schober P, Boer C, Schwarte LA (2018) Correlation coefficients: appropriate use and interpretation. Anesth Analg 126(5):1763\u20131768","journal-title":"Anesth Analg"},{"issue":"3","key":"9916_CR38","doi-asserted-by":"publisher","first-page":"507","DOI":"10.2307\/2529204","volume":"30","author":"AJ Scott","year":"1974","unstructured":"Scott AJ, Knott M (1974) A cluster analysis method for grouping means in the analysis of variance. Biometrics 30(3):507\u2013512","journal-title":"Biometrics"},{"key":"9916_CR39","first-page":"2443","volume":"8","author":"S Sonnenburg","year":"2007","unstructured":"Sonnenburg S, Braun ML, Ong CS, Bengio S, Bottou L, Holmes G, LeCun Y, M\u00c3\u017eller KR, Pereira F, Rasmussen CE, et al. (2007) The need for open source software in machine learning. J Mach Learn Res 8:2443\u20132466","journal-title":"J Mach Learn Res"},{"key":"9916_CR40","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press"},{"key":"9916_CR41","doi-asserted-by":"crossref","unstructured":"Tantithamthavorn C, Hassan AE (2018) An experience report on defect modelling in practice: pitfalls and challenges. In: Proceedings of the 40th international conference on software engineering: software engineering in practice. ACM, pp 286\u2013295","DOI":"10.1145\/3183519.3183547"},{"issue":"1","key":"9916_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TSE.2016.2584050","volume":"43","author":"C Tantithamthavorn","year":"2017","unstructured":"Tantithamthavorn C, McIntosh S, Hassan AE, Matsumoto K (2017) An empirical comparison of model validation techniques for defect prediction models. IEEE Trans Softw Eng 43(1):1\u201318","journal-title":"IEEE Trans Softw Eng"},{"issue":"7","key":"9916_CR43","doi-asserted-by":"publisher","first-page":"683","DOI":"10.1109\/TSE.2018.2794977","volume":"45","author":"C Tantithamthavorn","year":"2018","unstructured":"Tantithamthavorn C, McIntosh S, Hassan AE, Matsumoto K (2018) The impact of automated parameter optimization on defect prediction models. IEEE Trans Softw Eng 45(7):683\u2013711","journal-title":"IEEE Trans Softw Eng"},{"key":"9916_CR44","doi-asserted-by":"crossref","unstructured":"Tian Y, Nagappan M, Lo D, Hassan AE (2015) What are the characteristics of high-rated apps? A case study on free android applications. In: 2015 IEEE international conference on software maintenance and evolution (ICSME). IEEE, pp 301\u2013310","DOI":"10.1109\/ICSM.2015.7332476"},{"issue":"3","key":"9916_CR45","doi-asserted-by":"publisher","first-page":"395","DOI":"10.2307\/2982890","volume":"155","author":"GJ Upton","year":"1992","unstructured":"Upton GJ (1992) Fisher\u2019s exact test. J R Stat Soc: Ser A (Stat Soc) 155(3):395\u2013402","journal-title":"J R Stat Soc: Ser A (Stat Soc)"},{"key":"9916_CR46","doi-asserted-by":"crossref","unstructured":"Wan Z, Lo D, Xia X, Cai L, Li S (2017) Mining sandboxes for linux containers. In: IEEE international conference on software testing, verification and validation (ICST). IEEE, pp 92\u2013102","DOI":"10.1109\/ICST.2017.16"},{"issue":"11","key":"9916_CR47","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.1109\/TSE.2018.2877678","volume":"46","author":"Z Wan","year":"2018","unstructured":"Wan Z, Xia X, Hassan AE, Lo D, Yin J, Yang X (2018) Perceptions, expectations, and challenges in defect prediction. IEEE Trans Softw Eng 46(11):1241\u20131266","journal-title":"IEEE Trans Softw Eng"},{"key":"9916_CR48","unstructured":"Wang TC, Liu MY, Zhu JY, Liu G, Tao A, Kautz J, Catanzaro B (2018) Video-to-video synthesis. In: Advances in neural information processing systems, vol 31, pp 1144\u20131156"},{"key":"9916_CR49","doi-asserted-by":"crossref","unstructured":"Weber S, Luo J (2014) What makes an open source code popular on git hub?. In: IEEE international conference on data mining workshop. IEEE, pp 851\u2013855","DOI":"10.1109\/ICDMW.2014.55"},{"issue":"6","key":"9916_CR50","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon F (1945) Individual comparisons by ranking methods. Biom Bull 1(6):80\u201383","journal-title":"Biom Bull"},{"key":"9916_CR51","unstructured":"Woodfield SN, Dunsmore HE, Shen VY (1981) The effect of modularization and comments on program comprehension. In: Proceedings of the 5th international conference on Software engineering. IEEE Press, pp 215\u2013223"},{"key":"9916_CR52","doi-asserted-by":"crossref","unstructured":"Xia X, Wan Z, Kochhar PS, Lo D (2019) How practitioners perceive coding proficiency. In: 2019 IEEE\/ACM 41st international conference on software engineering (ICSE). IEEE, pp 924\u2013935","DOI":"10.1109\/ICSE.2019.00098"},{"key":"9916_CR53","doi-asserted-by":"crossref","unstructured":"Yan M, Xia X, Zhang X, Yang D, Xu L (2017) Automating aggregation for software quality modeling. In: IEEE international conference on software maintenance and evolution (ICSME). IEEE, pp 529\u2013533","DOI":"10.1109\/ICSME.2017.30"},{"issue":"12","key":"9916_CR54","doi-asserted-by":"publisher","first-page":"1211","DOI":"10.1109\/TSE.2018.2831232","volume":"45","author":"M Yan","year":"2018","unstructured":"Yan M, Xia X, Shihab E, Lo D, Yin J, Yang X (2018) Automating change-level self-admitted technical debt determination. IEEE Trans Softw Eng 45(12):1211\u20131229","journal-title":"IEEE Trans Softw Eng"},{"key":"9916_CR55","doi-asserted-by":"crossref","unstructured":"Yang J, Lu J, Lee S, Batra D, Parikh D (2018) Graph r-cnn for scene graph generation. In: Proceedings of the European conference on computer vision (ECCV, pp 670\u2013685","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"9916_CR56","doi-asserted-by":"crossref","unstructured":"Zar JH (2005) Spearman rank correlation. Encyclopedia of Biostatistics 7","DOI":"10.1002\/0470011815.b2a15150"},{"key":"9916_CR57","doi-asserted-by":"crossref","unstructured":"Zhu J, Zhou M, Mockus A (2014) Patterns of folder use and project popularity: a case study of github repositories. In: Proceedings of the 8th ACM\/IEEE international symposium on empirical software engineering and measurement. ACM, p 30","DOI":"10.1145\/2652524.2652564"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-020-09916-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10664-020-09916-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-020-09916-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,19]],"date-time":"2021-02-19T22:54:23Z","timestamp":1613775263000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10664-020-09916-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1]]},"references-count":57,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,1]]}},"alternative-id":["9916"],"URL":"https:\/\/doi.org\/10.1007\/s10664-020-09916-6","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1]]},"assertion":[{"value":"14 October 2020","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"2"}}