{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T12:37:24Z","timestamp":1766579844462,"version":"3.37.3"},"reference-count":97,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2016,11,22]],"date-time":"2016-11-22T00:00:00Z","timestamp":1479772800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100006952","name":"Louisiana Board of Regents","doi-asserted-by":"publisher","award":["LEQSF(2015- 18)-RD-A-07"],"award-info":[{"award-number":["LEQSF(2015- 18)-RD-A-07"]}],"id":[{"id":"10.13039\/100006952","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2017,8]]},"DOI":"10.1007\/s10664-016-9473-1","type":"journal-article","created":{"date-parts":[[2016,11,21]],"date-time":"2016-11-21T21:44:20Z","timestamp":1479764660000},"page":"1965-2000","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Semantic topic models for source code analysis"],"prefix":"10.1007","volume":"22","author":[{"given":"Anas","family":"Mahmoud","sequence":"first","affiliation":[]},{"given":"Gary","family":"Bradshaw","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,11,22]]},"reference":[{"key":"9473_CR1","doi-asserted-by":"crossref","unstructured":"Abadi A, Nisenson M, Simionovici Y (2008) A traceability technique for specifications. In: International Conference on Program Comprehension, pp. 103\u2013112","DOI":"10.1109\/ICPC.2008.30"},{"key":"9473_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal C, Zhai C (2012) A survey of text clustering algorithms. In: Mining Text Data, pp. 77\u2013128. Springer","DOI":"10.1007\/978-1-4614-3223-4_4"},{"key":"9473_CR3","doi-asserted-by":"crossref","unstructured":"Andrzejewski D, Mulhern A, Liblit B, Zhu X (2007) Statistical debugging using latent topic models. In: European conference on Machine Learning, pp. 6\u201317","DOI":"10.1007\/978-3-540-74958-5_5"},{"key":"9473_CR4","doi-asserted-by":"crossref","unstructured":"Anquetil N, Fourrier C, Lethbridge T (1999) Experiments with clustering as a software remodularization method. In: Working Conference on Reverse Engineering, pp. 235\u2013255","DOI":"10.1109\/WCRE.1999.806964"},{"key":"9473_CR5","unstructured":"Anquetil N, Lethbridge T (1998) Assessing the relevance of identifier names in a legacy software system. In: Conference of the Centre for Advanced Studies on Collaborative Research, pp. 4\u201314"},{"key":"9473_CR6","doi-asserted-by":"crossref","unstructured":"Asuncion H, Asuncion A, Taylor R (2010) Software traceability with topic modeling. In: International Conference on Software Engineering, pp. 95\u2013104","DOI":"10.1145\/1806799.1806817"},{"issue":"10","key":"9473_CR7","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1145\/1449955.1449807","volume":"43","author":"P Baldi","year":"2008","unstructured":"Baldi P, Lopes C, Linstead E, Bajracharya S (2008) A theory of aspects as latent topics. ACM SIGPLAN Not 43(10):543\u2013562","journal-title":"ACM SIGPLAN Not"},{"issue":"3","key":"9473_CR8","doi-asserted-by":"crossref","first-page":"619","DOI":"10.1007\/s10664-012-9231-y","volume":"19","author":"A Barua","year":"2014","unstructured":"Barua A, Thomas S, Hassan A (2014) What are developers talking about? An analysis of topics and trends in stack overflow. Empir Softw Eng 19(3):619\u2013654","journal-title":"Empir Softw Eng"},{"issue":"1","key":"9473_CR9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2559935","volume":"23","author":"G Bavota","year":"2014","unstructured":"Bavota G, Gethers M, Oliveto R, Poshyvanyk D, De Lucia A (2014) Improving software modularization via automated analysis of latent topics and dependencies. ACM Trans Softw Eng Methodol 23(1):1\u201333","journal-title":"ACM Trans Softw Eng Methodol"},{"issue":"7","key":"9473_CR10","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1109\/TSE.2013.60","volume":"40","author":"G Bavota","year":"2014","unstructured":"Bavota G, Oliveto R, Gethers M, Poshyvanyk D, De Lucia A (2014) Methodbook: Recommending move method refactorings via Relational Topic Models. IEEE Trans Softw Eng 40(7):671\u2013694","journal-title":"IEEE Trans Softw Eng"},{"issue":"SI","key":"9473_CR11","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1145\/223427.211856","volume":"20","author":"J Bieman","year":"1995","unstructured":"Bieman J, Kang B (1995) Cohesion and reuse in an object-oriented system. SIGSOFT Software Engineering Notes 20(SI):259\u2013262","journal-title":"SIGSOFT Software Engineering Notes"},{"issue":"3","key":"9473_CR12","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1007\/s10664-012-9224-x","volume":"19","author":"L Biggers","year":"2014","unstructured":"Biggers L, Bocovich C, Capshaw R, Eddy B, Etzkorn L, Kraft N (2014) Configuring Latent Dirichlet Allocation based feature location. Empir Softw Eng 19(3):465\u2013500","journal-title":"Empir Softw Eng"},{"key":"9473_CR13","doi-asserted-by":"crossref","unstructured":"Binkley D, Heinz D, Lawrie D, Overfelt J (2014) Understanding LDA in source code analysis. In: International Conference on Program Comprehension, pp. 26\u201336","DOI":"10.1145\/2597008.2597150"},{"key":"9473_CR14","unstructured":"Blei D, Griffiths T, Jordan M, Tenenbaum J (2003) Hierarchical topic models and the nested Chinese restaurant process. In: Advances in Neural Information Processing Systems"},{"key":"9473_CR15","first-page":"993","volume":"3","author":"D Blei","year":"2003","unstructured":"Blei D, Ng A, Jordan M (2003) Latent Dirichlet Allocation. J Mach Learn Res 3:993\u20131022","journal-title":"J Mach Learn Res"},{"key":"9473_CR16","unstructured":"Budiu R, Royer C, Pirolli P (2007) Modeling information scent: A comparison of LSA, PMI and GLSA similarity measures on common tests and corpora. In: Large Scale Semantic Access to Content (Text, Image, Video, and Sound), pp. 314\u2013332"},{"issue":"3","key":"9473_CR17","doi-asserted-by":"crossref","first-page":"510","DOI":"10.3758\/BF03193020","volume":"39","author":"J Bullinaria","year":"2007","unstructured":"Bullinaria J, Levy J (2007) Extracting semantic representations from word co-occurrence statistics: A computational study. Behav Res Methods 39(3):510\u2013526","journal-title":"Behav Res Methods"},{"key":"9473_CR18","unstructured":"van Rijsbergen CJ (1979) Information Retrieval. Butterworths"},{"key":"9473_CR19","doi-asserted-by":"crossref","unstructured":"Caprile B, Tonella P (2000) Restructuring program identifier names. In: International Conference on Software Maintenance, pp. 97\u2013107","DOI":"10.1109\/ICSM.2000.883022"},{"key":"9473_CR20","unstructured":"Chang J (2010) Not-so-latent Dirichlet allocation: Collapsed Gibbs sampling using human judgments. In: NAACL HLT 2010 Workshop on Creating Speech and Language Data with Amazon\u2019s Mechanical Turk, pp. 131\u2013138"},{"key":"9473_CR21","unstructured":"Chang J, Boyd-Graber J, Gerrish S, Wang C, Blei D (2009) Reading tea leaves: How humans interpret topic models. Curran Associates, pp 288\u2013296"},{"key":"9473_CR22","unstructured":"Chen T, Thomas S, Nagappan M, Hassan A (2012) Explaining software defects using topic models. In: Working Conference on Mining Software Repositories, pp. 189\u2013198"},{"issue":"6","key":"9473_CR23","doi-asserted-by":"crossref","first-page":"476","DOI":"10.1109\/32.295895","volume":"20","author":"S Chidamber","year":"1994","unstructured":"Chidamber S, Kemerer C (1994) A metrics suite for object oriented design. IEEE Trans Softw Eng 20(6):476\u2013493","journal-title":"IEEE Trans Softw Eng"},{"issue":"1","key":"9473_CR24","first-page":"22","volume":"16","author":"K Church","year":"1990","unstructured":"Church K, Hanks P (1990) Word association norms, mutual information, and lexicography. Comput Linguist 16(1):22\u201329","journal-title":"Comput Linguist"},{"issue":"3","key":"9473_CR25","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1109\/TKDE.2007.48","volume":"19","author":"R Cilibrasi","year":"2007","unstructured":"Cilibrasi R, Vitanyi P (2007) The google similarity distance. IEEE Trans Knowl Data Eng 19(3):370\u2013383","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"9473_CR26","doi-asserted-by":"crossref","unstructured":"De Lucia A, Di Penta M, Oliveto R, Panichella A, Panichelle S (2012) Using IR methods for labeling source code artifacts: Is it worthwhile?. In: International Conference on Program Comprehension, pp. 193\u2013202","DOI":"10.1109\/ICPC.2012.6240488"},{"key":"9473_CR27","doi-asserted-by":"crossref","unstructured":"Dean A, Voss D (1999) Design and Analysis of Experiments. Springer","DOI":"10.1007\/b97673"},{"issue":"6","key":"9473_CR28","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais S, Furnas G, Landauer T, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci 41(6):391\u2013407","journal-title":"J Am Soc Inf Sci"},{"key":"9473_CR29","doi-asserted-by":"crossref","unstructured":"Dei\u00dfenb\u00f6ck F, Pizka M (2005) Concise and consistent naming. In: International Workshop on Program Comprehension, pp. 97\u2013106","DOI":"10.1109\/WPC.2005.14"},{"issue":"5","key":"9473_CR30","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1137\/0911052","volume":"11","author":"J Demmel","year":"1990","unstructured":"Demmel J, Kahan W (1990) Accurate singular values of bidiagonal matrices. J Sci Stat Comput 11(5):87\u2013 912","journal-title":"J Sci Stat Comput"},{"key":"9473_CR31","doi-asserted-by":"crossref","unstructured":"Gabel M, Zhendong S (2010) A study of the uniqueness of source code. In: ACM SIGSOFT International Symposium on Foundations of Software Engineering, pp. 147\u2013156","DOI":"10.1145\/1882291.1882315"},{"key":"9473_CR32","doi-asserted-by":"crossref","unstructured":"Gethers M, Poshyvanyk D (2010) Using relational topic models to capture coupling among classes in object-oriented software systems. In: International Conference on Software Maintenance, pp. 1\u201310","DOI":"10.1109\/ICSM.2010.5609687"},{"key":"9473_CR33","doi-asserted-by":"crossref","unstructured":"Gethers M, Savage T, Di Penta M, Oliveto R, Poshyvanyk D, De Lucia A (2011) Codetopics: which topic am I coding now?. In: International Conference on Software Engineering, pp. 1034\u20131036","DOI":"10.1145\/1985793.1985988"},{"key":"9473_CR34","doi-asserted-by":"crossref","unstructured":"Girolami M, Kab\u00e1n A (2003) On an equivalence between PLSI and LDA. In: International ACM SIGIR Conference on Research and Development in Informaion Retrieval, pp. 433\u2013434","DOI":"10.1145\/860435.860537"},{"key":"9473_CR35","doi-asserted-by":"crossref","unstructured":"Gracia J, Trillo R, Espinoza M, Mena E (2006) Querying the web: A multiontology disambiguation method. In: International Conference on Web Engineering, pp. 241\u2013248","DOI":"10.1145\/1145581.1145630"},{"key":"9473_CR36","doi-asserted-by":"crossref","unstructured":"Grant S, Cordy J (2010) Estimating the optimal number of latent concepts in source code analysis. In: International Working Conference on Source Code Analysis and Manipulation, pp. 65\u201374","DOI":"10.1109\/SCAM.2010.22"},{"key":"9473_CR37","doi-asserted-by":"crossref","unstructured":"Griffiths T, Steyvers M (2004) Finding scientific topics. In: The National Academy of Sciences, pp. 5228\u20135235","DOI":"10.1073\/pnas.0307752101"},{"key":"9473_CR38","doi-asserted-by":"crossref","unstructured":"Haiduc S, Marcus A (2008) On the use of domain terms in source code. In: IEEE International Conference on Program Comprehension, pp. 113\u2013122","DOI":"10.1109\/ICPC.2008.29"},{"key":"9473_CR39","doi-asserted-by":"crossref","unstructured":"Hearst M, Pedersen J (1996) Reexamining the cluster hypothesis: scatter\/gather on retrieval results. In: international ACM SIGIR conference on Research and development in information retrieval, pp. 76\u201384","DOI":"10.1145\/243199.243216"},{"key":"9473_CR40","doi-asserted-by":"crossref","unstructured":"Hindle A, Barr E, Su Z, Gabel M, Devanbu P (2012) On the naturalness of software. In: International Conference on Software Maintenance, pp. 837\u2013847","DOI":"10.1109\/ICSE.2012.6227135"},{"issue":"2","key":"9473_CR41","doi-asserted-by":"crossref","first-page":"479","DOI":"10.1007\/s10664-014-9312-1","volume":"20","author":"A Hindle","year":"2015","unstructured":"Hindle A, Bird C, Zimmermann T, Nagappan N (2015) Do topics make sense to managers and developers Empir Softw Eng 20(2):479\u2013515","journal-title":"Empir Softw Eng"},{"key":"9473_CR42","doi-asserted-by":"crossref","unstructured":"Hofmann T (1999) Probabilistic latent semantic indexing. In: International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 50\u201357","DOI":"10.1145\/312624.312649"},{"key":"9473_CR43","doi-asserted-by":"crossref","unstructured":"Howard M, Gupta S, Pollock L, Vijay-Shanker K (2013) Automatically mining software-based, semantically-similar words from comment-code mappings. In: Working Conference on Mining Software Repositories, pp. 377\u2013386","DOI":"10.1109\/MSR.2013.6624052"},{"key":"9473_CR44","doi-asserted-by":"crossref","unstructured":"Khatiwada S, Kelly M, Mahmoud A (2016) Stac: A tool for static textual analysis of code. In: International Conference on Program Comprehension, pp. 1\u20133","DOI":"10.1109\/ICPC.2016.7503746"},{"key":"9473_CR45","doi-asserted-by":"crossref","unstructured":"Koltcov S, Koltsova O, Nikolenko S (2014) Latent Dirichlet Allocation: Stability and applications to studies of user-generated content. In: ACM Conference on Web Science, pp. 161\u2013165","DOI":"10.1145\/2615569.2615680"},{"issue":"3","key":"9473_CR46","doi-asserted-by":"crossref","first-page":"230","DOI":"10.1016\/j.infsof.2006.10.017","volume":"49","author":"A Kuhn","year":"2007","unstructured":"Kuhn A, Ducasse S, G\u00edrba T (2007) Semantic clustering: Identifying topics in source code. Inf Softw Technol 49(3):230\u2013243","journal-title":"Inf Softw Technol"},{"key":"9473_CR47","unstructured":"Lau J, Newman D, Karimi S, Baldwin T (2010) Best topic word selection for topic labelling. In: International Conference on Computational Linguistics, pp. 605\u2013613"},{"issue":"12","key":"9473_CR48","doi-asserted-by":"crossref","first-page":"3250","DOI":"10.1109\/TIT.2004.838101","volume":"50","author":"M Li","year":"2004","unstructured":"Li M, Chen X, Li X, Ma B (2004) Vitanyi: The similarity metric. IEEE Trans Inf Theory 50(12):3250\u20133264","journal-title":"IEEE Trans Inf Theory"},{"key":"9473_CR49","doi-asserted-by":"crossref","unstructured":"Linstead E, Lopes C, Baldi P (2008) An application of Latent Dirichlet Allocation to analyzing software evolution. In: International Conference on Machine Learning and Applications, pp. 813\u2013818","DOI":"10.1109\/ICMLA.2008.47"},{"key":"9473_CR50","doi-asserted-by":"crossref","unstructured":"Linstead E, Rigor P, Bajracharya S, Lopes C, Baldi P (2007) Mining concepts from code with probabilistic topic models. In: International Conference on Automated Software Engineering, pp. 461\u2013464","DOI":"10.1145\/1321631.1321709"},{"key":"9473_CR51","doi-asserted-by":"crossref","unstructured":"Liu Y, Poshyvanyk D, Ferenc R, Gyimothy T, Chrisochoides N (2009) Modeling class cohesion as mixtures of latent topics. In: International Conference on Software Maintenance, pp. 233\u2013242","DOI":"10.1109\/ICSM.2009.5306318"},{"key":"9473_CR52","doi-asserted-by":"crossref","unstructured":"Lo D, Nagappan N, Zimmermann T (2015) How practitioners perceive the relevance of software engineering research. In: Joint Meeting on Foundations of Software Engineering, pp. 415\u2013425","DOI":"10.1145\/2786805.2786809"},{"key":"9473_CR53","doi-asserted-by":"crossref","unstructured":"Lohar S, Amornborvornwong S, Zisman A, Cleland-Huang J (2013) Improving trace accuracy through data-driven configuration and composition of tracing features. In: Joint Meeting on Foundations of Software Engineering, pp. 378\u2013388","DOI":"10.1145\/2491411.2491432"},{"key":"9473_CR54","doi-asserted-by":"crossref","unstructured":"Lukins S, Kraft N, Etzkorn L (2008) Source code retrieval for bug localization using Latent Dirichlet Allocation. In: Working Conference on Reverse Engineering, pp. 155\u2013164","DOI":"10.1109\/WCRE.2008.33"},{"issue":"9","key":"9473_CR55","doi-asserted-by":"crossref","first-page":"972","DOI":"10.1016\/j.infsof.2010.04.002","volume":"52","author":"S Lukins","year":"2010","unstructured":"Lukins S, Kraft N, Etzkorn L (2010) Bug localization using Latent Dirichlet Allocation. Inf Softw Technol 52(9):972\u2013990","journal-title":"Inf Softw Technol"},{"issue":"1","key":"9473_CR56","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2824251","volume":"25","author":"A Mahmoud","year":"2015","unstructured":"Mahmoud A, Bradshaw G (2015) Estimating semantic relatedness in source code. ACM Trans Softw Eng Methodol 25(1):1\u201335","journal-title":"ACM Trans Softw Eng Methodol"},{"issue":"3","key":"9473_CR57","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1007\/s00766-013-0199-y","volume":"20","author":"A Mahmoud","year":"2015","unstructured":"Mahmoud A, Niu N (2015) On the role of semantics in automated requirements tracing. Requir Eng 20(3):281\u2013300","journal-title":"Requir Eng"},{"key":"9473_CR58","doi-asserted-by":"crossref","unstructured":"Mancoridis S, Mitchell B, Chen Y, Gansner E (1999) Bunch: A clustering tool for the recovery and maintenance of software system structures. In: International Conference on Software Maintenance, pp. 50\u201359","DOI":"10.1109\/ICSM.1999.792498"},{"key":"9473_CR59","doi-asserted-by":"crossref","unstructured":"Maskeri G, Sarkar S, Heafield K (2008) Mining business topics in source code using Latent Dirichlet Allocation. In: India software engineering conference, pp. 113\u2013120","DOI":"10.1145\/1342211.1342234"},{"key":"9473_CR60","unstructured":"Meghan K, Revelle, Poshyvanyk D (2009) Using Latent Dirichlet Allocation for automatic categorization of software. In: International Working Conference on Mining Software Repositories, pp. 163\u2013166"},{"key":"9473_CR61","doi-asserted-by":"crossref","unstructured":"Mei Q, Shen X, Zhai C (2007) Automatic labeling of multinomial topic models. In: International Conference on Knowledge Discovery and Data Mining, pp. 490\u2013499","DOI":"10.1145\/1281192.1281246"},{"key":"9473_CR62","unstructured":"Mihalcea R, Corley C, Strapparava C (2006) Corpus-based and knowledge-based measures of text semantic similarity. In: National Conference on Artificial Intelligence, pp. 775\u2013780"},{"key":"9473_CR63","unstructured":"Mimno D, Wallach H, Talley E, Leenders M, McCallum A (2011) Optimizing semantic coherence in topic models. In: The Conference on Empirical Methods in Natural Language Processing, pp. 262\u2013272"},{"issue":"3","key":"9473_CR64","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1109\/TSE.2006.31","volume":"32","author":"B Mitchell","year":"2006","unstructured":"Mitchell B, Mancoridis S (2006) On the automatic modularization of software systems using the Bunch tool. IEEE Trans Softw Eng 32(3):193\u2013208","journal-title":"IEEE Trans Softw Eng"},{"key":"9473_CR65","doi-asserted-by":"crossref","unstructured":"Neuhaus S, Zimmermann T (2010) Security trend analysis with CVE topic models. In: International Symposium on Software Reliability Engineering, pp. 111\u2013120","DOI":"10.1109\/ISSRE.2010.53"},{"key":"9473_CR66","unstructured":"Newman D, Bonilla E, Buntine W (2011) Improving topic coherence with regularized topic models. In: Neural Information Processing Systems, pp. 496\u2013504"},{"key":"9473_CR67","unstructured":"Newman D, Han Lau J, Grieser K, Baldwin T (2010) Automatic evaluation of topic coherence. In: Annual Conference of the North American Chapter of the Association for Computational Linguistics, pp. 100\u2013108"},{"key":"9473_CR68","doi-asserted-by":"crossref","unstructured":"Newman D, Noh Y, Talley E, Karimi S, Baldwin T (2010) Evaluating topic models for digital libraries. In: Annual Joint Conference on Digital Libraries, pp. 215\u2013224","DOI":"10.1145\/1816123.1816156"},{"key":"9473_CR69","doi-asserted-by":"crossref","unstructured":"Nguyen A, Nguyen T, Al-Kofahi J, Nguyen H, Nguyen T (2011) A topic-based approach for narrowing the search space of buggy files from a bug report. In: Automated Software Engineering, pp. 263\u2013 272","DOI":"10.1109\/ASE.2011.6100062"},{"key":"9473_CR70","doi-asserted-by":"crossref","unstructured":"Niu N, Mahmoud A (2012) Enhancing candidate link generation for requirements tracing: The cluster hypothesis revisited. In: IEEE International Requirements Engineering Conference, pp. 81\u201390","DOI":"10.1109\/RE.2012.6345842"},{"key":"9473_CR71","doi-asserted-by":"crossref","unstructured":"Oliveto R, Gethers M, Poshyvanyk D, De Lucia A (2010) On the equivalence of information retrieval methods for automated traceability link recovery. In: International Conference on Program Comprehension, pp. 68\u201371","DOI":"10.1109\/ICPC.2010.20"},{"key":"9473_CR72","doi-asserted-by":"crossref","unstructured":"Panichella A, Dit B, Oliveto R, Di Penta M, Poshyvanyk D, De Lucia A (2013) How to effectively use topic models for software engineering tasks? An approach based on genetic algorithms. In: International Conference on Software Engineering, pp. 522\u2013531","DOI":"10.1109\/ICSE.2013.6606598"},{"key":"9473_CR73","doi-asserted-by":"crossref","unstructured":"Panichella1 A, Dit B, Oliveto R, Di Penta M, Poshyvanyk D, De Lucia A (2016) Parameterizing and assembling IR-based solutions for SE tasks using genetic algorithms. In: International Conference on Software Analysis, Evolution, and Reengineering, pp. 522\u2013531","DOI":"10.1109\/SANER.2016.97"},{"key":"9473_CR74","doi-asserted-by":"crossref","unstructured":"Porteous I, Newman D, Ihler A, Asuncion A, Smyth P, Welling M (2008) Fast collapsed gibbs sampling for Latent Dirichlet Allocation. In: ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 569\u2013577","DOI":"10.1145\/1401890.1401960"},{"key":"9473_CR75","unstructured":"Porter F (1997) An algorithm for suffix stripping. Morgan Kaufmann Publishers Inc, pp 313\u2013316"},{"key":"9473_CR76","doi-asserted-by":"crossref","unstructured":"Potapenko A, Vorontsov K (2013) Robust PLSA Performs Better Than LDA. The MIT Press, pp 784\u2013787","DOI":"10.1007\/978-3-642-36973-5_84"},{"issue":"3","key":"9473_CR77","doi-asserted-by":"crossref","first-page":"647","DOI":"10.3758\/BRM.41.3.647","volume":"41","author":"G Recchia","year":"2009","unstructured":"Recchia G, Jones M (2009) More data trumps smarter algorithms: Comparing Pointwise Mutual Information with Latent Semantic Analysis. Behav Res Methods 41 (3):647\u2013656","journal-title":"Behav Res Methods"},{"issue":"11","key":"9473_CR78","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang C (1975) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"key":"9473_CR79","unstructured":"Savage T, Dit B, Gethers M, Poshyvanyk D (2010) TopicXP: Exploring topics in source code using Latent Dirichlet Allocation. In: IEEE International Conference on Software Maintenance, pp. 1\u20136"},{"issue":"1","key":"9473_CR80","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1016\/j.cosrev.2007.05.001","volume":"1","author":"S Schaeffer","year":"2007","unstructured":"Schaeffer S (2007) Graph clustering. Computer Science Review 1(1):27\u201364","journal-title":"Computer Science Review"},{"key":"9473_CR81","doi-asserted-by":"crossref","unstructured":"Slonim N, Tishby N (2000) Document clustering using word clusters via the information bottleneck method. In: International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 208\u2013215","DOI":"10.1145\/345508.345578"},{"key":"9473_CR82","unstructured":"Sontag D, Roy D (2011) Complexity of inference in latent dirichlet allocation. In: Shawe-Taylor J., Zemel R.S., Bartlett P.L., Pereira F., Weinberger K.Q. (eds) Advances in Neural Information Processing Systems 24, pp. 1008\u20131016. Curran Associates, Inc"},{"key":"9473_CR83","doi-asserted-by":"crossref","unstructured":"Sridhara G, Hill E, Pollock L, Vijay-Shanker K (2008) Identifying word relations in software: A comparative study of semantic similarity tools. In: International Conference on Program Comprehension, pp. 123\u2013132","DOI":"10.1109\/ICPC.2008.18"},{"key":"9473_CR84","unstructured":"Stevens K, Kegelmeyer P, Andrzejewski D, Buttler D (2012) Exploring topic coherence over many models and many topics. In: Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning, pp. 952\u2013961"},{"key":"9473_CR85","unstructured":"Steyvers M, Griffiths T (2007) Probabilistic topic models. Psychology Press, pp 427\u2013448"},{"key":"9473_CR86","doi-asserted-by":"crossref","unstructured":"Teh Y, Newman D, Welling M (2007) A collapsed variational bayesian inference algorithm for Latent Dirichlet Allocation. In: Advances in Neural Information Processing Systems 19","DOI":"10.21236\/ADA629956"},{"key":"9473_CR87","doi-asserted-by":"crossref","unstructured":"Than K, Ho TB (2012) Fully sparse topic models. In: European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 490\u2013505","DOI":"10.1007\/978-3-642-33460-3_37"},{"key":"9473_CR88","doi-asserted-by":"crossref","unstructured":"Thomas S (2011) Mining software repositories using topic models. In: International Conference on Software Engineering, pp. 1138\u20131139","DOI":"10.1145\/1985793.1986020"},{"key":"9473_CR89","doi-asserted-by":"crossref","unstructured":"Thomas S, Adams B, Hassan A, Blostein D (2010) Validating the use of topic models for software evolution. In: IEEE Working Conference on Source Code Analysis and Manipulation, pp. 55\u201364","DOI":"10.1109\/SCAM.2010.13"},{"issue":"1","key":"9473_CR90","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1007\/s10664-012-9219-7","volume":"19","author":"S Thomas","year":"2014","unstructured":"Thomas S, Hemmati H, Hassan A, Blostein D (2014) Static test case prioritization using topic models. Empir Softw Eng 19(1):182\u2013212","journal-title":"Empir Softw Eng"},{"key":"9473_CR91","doi-asserted-by":"crossref","unstructured":"Tian Y, Lo D, Lawall J (2014) Automated construction of a software-specific word similarity database. In: IEEE Conference on Software Maintenance, Reengineering and Reverse Engineering, pp. 44\u201353","DOI":"10.1109\/CSMR-WCRE.2014.6747213"},{"key":"9473_CR92","doi-asserted-by":"crossref","unstructured":"Turney P (2001) Mining the web for synonyms: PMI-IR versus LSA on TOEFL. In: European Conference on Machine Learning, pp. 491\u2013502","DOI":"10.1007\/3-540-44795-4_42"},{"key":"9473_CR93","doi-asserted-by":"crossref","unstructured":"Tzerpos V, Holt R (2000) ACDC: An algorithm for comprehension-driven clustering. In: Working Conference on Reverse Engineering, pp. 258\u2013267","DOI":"10.1109\/WCRE.2000.891477"},{"key":"9473_CR94","unstructured":"Wallach H, Mimno D, McCallum A (2009) Rethinking LDA: Why priors matter. In: Bengio Y., Schuurmans D., Lafferty J., Williams C., Culotta A. (eds) Advances in Neural Information Processing Systems 22, pp. 1973\u20131981. Curran Associates, Inc"},{"key":"9473_CR95","doi-asserted-by":"crossref","unstructured":"Wallach H, Murray I, Salakhutdinov R, Mimno D (2009) Evaluation methods for topic models. In: International Conference on Machine Learning, pp. 1105\u20131112","DOI":"10.1145\/1553374.1553515"},{"issue":"6","key":"9473_CR96","doi-asserted-by":"crossref","first-page":"1856","DOI":"10.1007\/s10664-013-9264-x","volume":"19","author":"J Yang","year":"2014","unstructured":"Yang J, Tan L (2014) Swordnet: Inferring semantically related words from software context. Empirical Software Engingeering 19(6):1856\u20131886","journal-title":"Empirical Software Engingeering"},{"key":"9473_CR97","doi-asserted-by":"crossref","unstructured":"Zhao Y, Karypis G (2002) Evaluation of hierarchical clustering algorithms for document datasets. In: International Conference on Information and Knowledge Management, pp. 515\u2013524","DOI":"10.1145\/584792.584877"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10664-016-9473-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-016-9473-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-016-9473-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,15]],"date-time":"2019-09-15T16:51:55Z","timestamp":1568566315000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10664-016-9473-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,11,22]]},"references-count":97,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,8]]}},"alternative-id":["9473"],"URL":"https:\/\/doi.org\/10.1007\/s10664-016-9473-1","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"type":"print","value":"1382-3256"},{"type":"electronic","value":"1573-7616"}],"subject":[],"published":{"date-parts":[[2016,11,22]]}}}