{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T19:52:20Z","timestamp":1769629940229,"version":"3.49.0"},"reference-count":105,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2019,3,5]],"date-time":"2019-03-05T00:00:00Z","timestamp":1551744000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s10664-019-09697-7","type":"journal-article","created":{"date-parts":[[2019,3,5]],"date-time":"2019-03-05T10:54:45Z","timestamp":1551783285000},"page":"2236-2284","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":56,"title":["Siamese: scalable and incremental code clone search via multiple code representations"],"prefix":"10.1007","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6502-1107","authenticated-orcid":false,"given":"Chaiyong","family":"Ragkhitwetsagul","sequence":"first","affiliation":[]},{"given":"Jens","family":"Krinke","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,3,5]]},"reference":[{"key":"9697_CR1","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1016\/j.infsof.2017.04.005","volume":"88","author":"R Abdalkareem","year":"2017","unstructured":"Abdalkareem R, Shihab E, Rilling J (2017) On code reuse from StackOverflow: an exploratory study on android apps. Inf Softw Technol 88:148\u2013158","journal-title":"Inf Softw Technol"},{"key":"9697_CR2","doi-asserted-by":"crossref","unstructured":"Acar Y, Backes M, Fahl S, Kim D, Mazurek ML, Stransky C (2016) You get where you\u2019re looking for: the impact of information sources on code security. In: SP \u201916, pp 289\u2013305","DOI":"10.1109\/SP.2016.25"},{"key":"9697_CR3","doi-asserted-by":"crossref","unstructured":"An L, Mlouki O, Khomh F, Antoniol G (2017) Stack Overflow: a code laundering platform?. In: SANER \u201917, pp 283\u2013293","DOI":"10.1109\/SANER.2017.7884629"},{"key":"9697_CR4","unstructured":"Aragon Consulting Group Inc (2018) Krugle. http:\/\/krugle.com , Online; Access 23-April-2018"},{"key":"9697_CR5","doi-asserted-by":"crossref","unstructured":"Aversano L, Cerulo L, Di Penta M (2007) How clones are maintained: an empirical study. In: Proceedings of the 11th European conference on software maintenance and reengineering (CSMR \u201907), IEEE, Los Alamitos, California, USA, pp 81\u201390","DOI":"10.1109\/CSMR.2007.26"},{"key":"9697_CR6","doi-asserted-by":"crossref","unstructured":"Bajracharya SK, Ossher J, Lopes CV (2010) Leveraging usage similarity for effective retrieval of examples in code repositories. In: Proceedings of the 18th ACM SIGSOFT international symposium on foundations of software engineering (FSE \u201910), p 157","DOI":"10.1145\/1882291.1882316"},{"key":"9697_CR7","doi-asserted-by":"crossref","unstructured":"Balasubramanian N, Kumaran G, Carvalho VR (2010) Exploring reductions for long web queries. In: SIGIR \u201910, p 571","DOI":"10.1145\/1835449.1835545"},{"key":"9697_CR8","unstructured":"Baltes S, Diehl S (2018) Usage and attribution of Stack Overflow code snippets in GitHub projects. Empir Softw Eng:1\u201337"},{"key":"9697_CR9","doi-asserted-by":"crossref","unstructured":"Bauer V, Volke T, Eder S (2016) Combining clone detection and latent semantic indexing to detect re-implementations. In: Proceedings of the IEEE 23rd international conference on software analysis, evolution, and reengineering (SANER \u201916), pp 23\u201329","DOI":"10.1109\/SANER.2016.26"},{"key":"9697_CR10","doi-asserted-by":"crossref","unstructured":"Baxter I, Yahin A, Moura L, Sant\u2019Anna M, Bier L (1998) Clone detection using abstract syntax trees. In: ICSM \u201998, vol 98, pp 368\u2013377","DOI":"10.1109\/ICSM.1998.738528"},{"key":"9697_CR11","doi-asserted-by":"crossref","unstructured":"Beckman NE, Kim D, Aldrich J (2011) An empirical study of object protocols in the wild. In: ECOOP \u201911, pp 2\u201326","DOI":"10.1007\/978-3-642-22655-7_2"},{"issue":"9","key":"9697_CR12","first-page":"577","volume":"33","author":"S Bellon","year":"2007","unstructured":"Bellon S, Koschke R, Antoniol G, Krinke J, Merlo E (2007) Comparison and evaluation of clone detection tools. TSE 33(9):577\u2013591","journal-title":"TSE"},{"key":"9697_CR13","doi-asserted-by":"crossref","unstructured":"Bendersky M, Croft WB (2008) Discovering key concepts in verbose queries. In: SIGIR\u201908, p 491","DOI":"10.1145\/1390334.1390419"},{"key":"9697_CR14","unstructured":"BlackDuck (2016) OpenHub. http:\/\/code.openhub.net , online; access 18-May-2016"},{"key":"9697_CR15","unstructured":"Boyter B (2018) Searchcode. https:\/\/searchcode.com, online; access 23-April-2018"},{"issue":"2","key":"9697_CR16","first-page":"151","volume":"37","author":"S Burrows","year":"2007","unstructured":"Burrows S, Tahaghoghi SMM, Zobel J (2007) Efficient plagiarism detection for large code repositories. Software: Practice and Experience 37(2):151\u2013175","journal-title":"Software: Practice and Experience"},{"issue":"4","key":"9697_CR17","doi-asserted-by":"publisher","first-page":"1476","DOI":"10.1007\/s10664-015-9394-4","volume":"21","author":"D Chatterji","year":"2016","unstructured":"Chatterji D, Carver JC, Kraft NA (2016) Code clones and developer behavior: results of two surveys of the clone research community. Empir Softw Eng 21(4):1476\u20131508","journal-title":"Empir Softw Eng"},{"key":"9697_CR18","volume-title":"Encyclopedia of Database Systems","author":"N Craswell","year":"2009","unstructured":"Craswell N (2009) Encyclopedia of Database Systems. Springer, Berlin"},{"key":"9697_CR19","first-page":"3","volume":"1","author":"N Davey","year":"1995","unstructured":"Davey N, Barson P, Field S, Frank R, Tansley D (1995) The development of a software clone detector. Int J Appl Softw Technol 1:3\u20134","journal-title":"Int J Appl Softw Technol"},{"key":"9697_CR20","unstructured":"Elasticsearch BV (2012) Lucene\u2019s practical scoring function. https:\/\/www.elastic.co\/guide\/en\/elasticsearch\/guide\/current\/practical-scoring-function.html , online; access 20-March-2017"},{"key":"9697_CR21","unstructured":"Elasticsearch BV (2016) Elasticsearch. https:\/\/www.elastic.co\/products\/elasticsearch , online; access 25-Jun-2016"},{"key":"9697_CR22","unstructured":"Flores E, Rosso P, Moreno L, Villatoro-Tello E (2014) Detection of source code re-use. http:\/\/users.dsic.upv.es\/grupos\/nle\/soco\/ , accessed: 2016-02-14"},{"key":"9697_CR23","volume-title":"Refactoring: improving the design of existing code","author":"M Fowler","year":"1999","unstructured":"Fowler M (1999) Refactoring: improving the design of existing code. Addison-Wesley, Boston"},{"key":"9697_CR24","doi-asserted-by":"crossref","unstructured":"Gallardo-Valencia RE, Sim SE (2009) Internet-scale code search. SUITE \u201909, pp 49\u201352","DOI":"10.1109\/SUITE.2009.5070022"},{"key":"9697_CR25","doi-asserted-by":"crossref","unstructured":"German DM, Manabe Y, Inoue K (2010) A sentence-matching method for automatic license identification of source code files. In: ASE \u201910, p 437","DOI":"10.1145\/1858996.1859088"},{"key":"9697_CR26","doi-asserted-by":"crossref","unstructured":"G\u00f6de N, Koschke R (2009) Incremental clone detection. In: CSMR\u201909, pp 219\u2013228","DOI":"10.1109\/CSMR.2009.20"},{"key":"9697_CR27","doi-asserted-by":"crossref","unstructured":"Grechanik M, Fu C, Xie Q, McMillan C, Poshyvanyk D, Cumby C (2010) A search engine for finding highly relevant applications. In: ICSE \u201910, pp 475\u2013484","DOI":"10.1145\/1806799.1806868"},{"key":"9697_CR28","doi-asserted-by":"crossref","unstructured":"Gu X, Zhang H, Kim S (2018) Deep code search. In: Proceedings of the 40th international conference on software engineering (ICSE \u201918), pp 933\u2013944","DOI":"10.1145\/3180155.3180167"},{"key":"9697_CR29","unstructured":"Harris S (2015) Simian \u2013 similarity analyser, version 2.4. http:\/\/www.harukizaemon.com\/simian\/ , accessed: 2016-02-14"},{"key":"9697_CR30","doi-asserted-by":"crossref","unstructured":"Hummel B, Juergens E, Heinemann L, Conradt M (2010) Index-based code clone detection: incremental, distributed, scalable. In: ICSM\u201910, pp 1\u20139","DOI":"10.1109\/ICSM.2010.5609665"},{"key":"9697_CR31","doi-asserted-by":"crossref","unstructured":"Inoue K, Sasaki Y, Xia P, Manabe Y (2012) Where does this code come from and where does it go? \u2014 integrated code history tracker for open source systems. In: ICSE \u201912, pp 331\u2013341","DOI":"10.1109\/ICSE.2012.6227181"},{"key":"9697_CR32","doi-asserted-by":"crossref","unstructured":"Ishio T, Sakaguchi Y, Ito K, Inoue K (2017) Source file set search for clone-and-own reuse analysis. In: Proceedings of the IEEE\/ACM 14th international conference on mining software repositories (MSR \u201917), pp 257\u2013268","DOI":"10.1109\/MSR.2017.19"},{"key":"9697_CR33","doi-asserted-by":"crossref","unstructured":"Jiang L, Misherghi G, Su Z, Glondu S (2007) DECKARD: scalable And accurate tree-based detection of code clones. In: ICSE\u201907. IEEE, Minneapolis, pp 96\u2013105","DOI":"10.1109\/ICSE.2007.30"},{"key":"9697_CR34","unstructured":"Juergens E, Deissenboeck F, Hummel B (2011) Code similarities beyond copy & paste. In: Proceedings of the 15th European conference on software maintenance and reengineering (CSMR \u201911), IEEE, pp 78\u2013 87"},{"issue":"7","key":"9697_CR35","first-page":"654","volume":"28","author":"T Kamiya","year":"2002","unstructured":"Kamiya T, Kusumoto S, Inoue K (2002) CCFinder: a multilinguistic token-based code clone detection system for large scale source code. TSE 28(7):654\u2013670","journal-title":"TSE"},{"key":"9697_CR36","doi-asserted-by":"crossref","unstructured":"Kapser C, Godfrey MW (2006) Cloning considered harmful considered harmful. In: Proceedings of the 13th Working Conference on Reverse Engineering (WCRE \u201906), Benevento, taly, pp 19\u201328","DOI":"10.1109\/WCRE.2006.1"},{"key":"9697_CR37","doi-asserted-by":"crossref","unstructured":"Kawaguchi S, Yamashina T, Uwano H, Fushida K, Kamei Y, Nagura M, Iida H (2009) SHINOBI: a tool for automatic code clone detection in the IDE. In: WCRE \u201909, pp 313\u2013314","DOI":"10.1109\/WCRE.2009.36"},{"key":"9697_CR38","doi-asserted-by":"crossref","unstructured":"Ke Y, Stolee KT, Goues CL, Brun Y (2015) Repairing programs with semantic code search. In: ASE\u201915, pp 295\u2013306","DOI":"10.1109\/ASE.2015.60"},{"key":"9697_CR39","doi-asserted-by":"crossref","unstructured":"Keivanloo I, Rilling J, Charland P (2011a) Internet-scale real-time code clone search via multi-level indexing. In: WCRE \u201911, pp 23\u201327","DOI":"10.1109\/WCRE.2011.13"},{"key":"9697_CR40","doi-asserted-by":"crossref","unstructured":"Keivanloo I, Rilling J, Charland P (2011b) SeClone \u2013 a hybrid approach to internet-scale real-time code clone search. In: ICPC \u201911, pp 223\u2013224","DOI":"10.1109\/ICPC.2011.29"},{"key":"9697_CR41","doi-asserted-by":"crossref","unstructured":"Keivanloo I, Forbes C, Rilling J (2012) Similarity search plug-in: Clone detection meets internet-scale code search. In: SUITE \u201912, pp 21\u201322","DOI":"10.1109\/SUITE.2012.6225474"},{"key":"9697_CR42","doi-asserted-by":"crossref","unstructured":"Keivanloo I, Rilling J, Zou Y (2014) Spotting working code examples. In: ICSE \u201914, pp 664\u2013675","DOI":"10.1145\/2568225.2568292"},{"key":"9697_CR43","unstructured":"Kim K, Kim D, Bissyand\u00e9 TF, Choi E, Li L, Klein J, Traon YL (2018) FaCoY \u2013 a code-to-code search engine. In: ICSE\u201918"},{"issue":"2","key":"9697_CR44","first-page":"105","volume":"1","author":"DE Knuth","year":"1971","unstructured":"Knuth DE (1971) An empirical study of fortran programs. Software: Practice and Experience 1(2):105\u2013 133","journal-title":"Software: Practice and Experience"},{"issue":"8","key":"9697_CR45","first-page":"747","volume":"26","author":"R Koschke","year":"2014","unstructured":"Koschke R (2014) Large-scale inter-system clone detection using suffix trees and hashing. Journal of Software: Evolution and Process 26(8):747\u2013769","journal-title":"Journal of Software: Evolution and Process"},{"key":"9697_CR46","doi-asserted-by":"crossref","unstructured":"Koschke R, Falke R, Frenzel P (2006) Clone detection using abstract syntax suffix trees. In: WCRE \u201906, pp 253\u2013262","DOI":"10.1109\/WCRE.2006.18"},{"key":"9697_CR47","doi-asserted-by":"crossref","unstructured":"Krinke J (2001) Identifying similar code with program dependence graphs. In: WCRE \u201901","DOI":"10.1109\/WCRE.2001.957835"},{"key":"9697_CR48","unstructured":"Kumaran G, Allan J (2007) A case for shorter queries, and helping users create them. In: NAACL-HLT \u201907, pp 220\u2013227"},{"key":"9697_CR49","doi-asserted-by":"crossref","unstructured":"Kumaran G, Carvalho VR (2009) Reducing long queries using query quality predictors. In: SIGIR\u201909 , p 564","DOI":"10.1145\/1571941.1572038"},{"key":"9697_CR50","doi-asserted-by":"crossref","unstructured":"Lavoie T, Eilers-Smith M, Merlo E (2010) Challenging cloning related problems with gpu-based algorithms. In: Proceedings of the 4th international workshop on software clones (IWSC \u201910), ACM, Cape Town, South Africa, pp 25\u201332","DOI":"10.1145\/1808901.1808905"},{"key":"9697_CR51","doi-asserted-by":"crossref","unstructured":"Lee MW, Roh JW, Hwang SW, Kim S (2010) Instant code clone search. In: FSE \u201910, p 167","DOI":"10.1145\/1882291.1882317"},{"key":"9697_CR52","doi-asserted-by":"crossref","unstructured":"Li L, Feng H, Zhuang W, Meng N, Ryder B (2017) CCLearner: a deep learning-based clone detection approach. In: ICSME\u201917, pp 249\u2013260","DOI":"10.1109\/ICSME.2017.46"},{"key":"9697_CR53","doi-asserted-by":"crossref","unstructured":"Linstead E, Bajracharya S, Ngo T, Rigor P, Lopes C, Baldi P (2009) Sourcerer: mining and searching internet-scale software repositories, vol 18, pp 300\u2013336","DOI":"10.1007\/s10618-008-0118-x"},{"key":"9697_CR54","unstructured":"Livieri S, German DM, Inoue K (2010) A needle in the stack: Efficient clone detection for huge collections of source code. Tech. rep., OSaka University"},{"key":"9697_CR55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3133908","volume":"1","author":"CV Lopes","year":"2017","unstructured":"Lopes CV, Maj P, Martins P, Saini V, Yang D, Zitny J, Sajnani H, Vitek J (2017) D\u00e9j\u00e0Vu: a map of code duplicates on GitHub. Proceedings of the ACM on Programming Languages (OOPSLA) 1:1\u201328","journal-title":"Proceedings of the ACM on Programming Languages (OOPSLA)"},{"key":"9697_CR56","volume-title":"An introduction to information retrieval, vol 21","author":"CD Manning","year":"2009","unstructured":"Manning CD, Raghavan P, Schutze H (2009) An introduction to information retrieval, vol 21. Cambridge University Press, Cambridge"},{"key":"9697_CR57","doi-asserted-by":"crossref","unstructured":"Martie L, Hoek AVD, Kwak T (2017) Understanding the impact of support for iteration on code search. In: ESEC\/FSE \u201917, pp 774\u2013785","DOI":"10.1145\/3106237.3106293"},{"key":"9697_CR58","doi-asserted-by":"crossref","unstructured":"McMillan C, Grechanik M, Poshyvanyk D, Xie Q, Fu C (2011) Portfolio: finding relevant functions and their usages. In: ICSE \u201911, p 111","DOI":"10.1145\/1985793.1985809"},{"issue":"2","key":"9697_CR59","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1037\/h0043158","volume":"63","author":"GA Miller","year":"1956","unstructured":"Miller GA (1956) The magical number seven, plus or minus two: Some limits on our capacity for processing information. Psychol Rev 63(2):81\u201397","journal-title":"Psychol Rev"},{"key":"9697_CR60","doi-asserted-by":"crossref","unstructured":"Myles G, Collberg C (2005) K-gram based software birthmarks. In: SAC \u201905, p 314","DOI":"10.1145\/1066677.1066753"},{"key":"9697_CR61","doi-asserted-by":"crossref","unstructured":"Nasehi SM, Sillito J, Maurer F, Burns C (2012) What makes a good code example?: a study of programming Q&A in StackOverflow. In: ICSM\u201912, pp 25\u201334","DOI":"10.1109\/ICSM.2012.6405249"},{"key":"9697_CR62","doi-asserted-by":"crossref","unstructured":"Nguyen TT, Nguyen HA, Al-Kofahi JM, Pham NH, Nguyen TN (2009) Scalable and incremental clone detection for evolving software. In: ICSM \u201909, pp 491\u2013494","DOI":"10.1109\/ICSM.2009.5306283"},{"key":"9697_CR63","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.jss.2017.11.039","volume":"137","author":"MA Nishi","year":"2018","unstructured":"Nishi MA, Damevski K (2018) Scalable code clone detection and search based on adaptive prefix filtering. J Syst Softw 137:130\u2013142","journal-title":"J Syst Softw"},{"issue":"1","key":"9697_CR64","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/s10664-015-9421-5","volume":"22","author":"H Niu","year":"2017","unstructured":"Niu H, Keivanloo I, Zou Y (2017) Learning to rank code examples for code search engines. Empir Softw Eng 22(1):259\u2013291","journal-title":"Empir Softw Eng"},{"issue":"2","key":"9697_CR65","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1007\/s10115-014-0742-2","volume":"43","author":"T Ohmann","year":"2014","unstructured":"Ohmann T, Rahal I (2014) Efficient clustering-based source code plagiarism detection using PIY. Knowl Inf Syst 43(2):445\u2013472","journal-title":"Knowl Inf Syst"},{"key":"9697_CR66","doi-asserted-by":"crossref","unstructured":"Omar C, Yoon YS, LaToza TD, Myers BA (2012) Active code completion. In: ICSE \u201912, pp 859\u2013869","DOI":"10.1109\/ICSE.2012.6227133"},{"issue":"3","key":"9697_CR67","doi-asserted-by":"publisher","first-page":"727","DOI":"10.1007\/s10115-013-0677-z","volume":"41","author":"JW Park","year":"2014","unstructured":"Park JW, Lee MW, Roh JW, Hwang SW, Kim S (2014) Surfacing code in the dark: an instant clone search approach. Knowl Inf Syst 41(3):727\u2013759","journal-title":"Knowl Inf Syst"},{"key":"9697_CR68","unstructured":"Parr T, Harwell S, Kochurkin I (2017) Grammars written for ANTLR v4. https:\/\/github.com\/antlr\/grammars-v4 , accessed: 2017-11-21"},{"key":"9697_CR69","doi-asserted-by":"crossref","unstructured":"Ponzanelli L, Bacchelli A, Lanza M (2013) Seahawk: Stack Overflow in the IDE. In: ICSE \u201913, pp 1295\u2013 1298","DOI":"10.1109\/ICSE.2013.6606701"},{"key":"9697_CR70","doi-asserted-by":"crossref","unstructured":"Ponzanelli L, Bavota G, Di Penta M, Oliveto R, Lanza M (2014) Mining StackOverflow to turn the IDE into a self-confident programming prompter. In: MSR \u201914, pp 102\u2013111","DOI":"10.1145\/2597073.2597077"},{"issue":"11","key":"9697_CR71","first-page":"1016","volume":"8","author":"L Prechelt","year":"2002","unstructured":"Prechelt L, Malpohl G, Philippsen M (2002) Finding plagiarisms among a set of programs with JPlag. J Univ Comput Sci 8(11):1016\u20131038","journal-title":"J Univ Comput Sci"},{"issue":"4","key":"9697_CR72","doi-asserted-by":"publisher","first-page":"2464","DOI":"10.1007\/s10664-017-9564-7","volume":"23","author":"C Ragkhitwetsagul","year":"2018","unstructured":"Ragkhitwetsagul C, Krinke J, Clark D (2018) A comparison of code similarity analysers. Empir Softw Eng 23(4):2464\u20132519","journal-title":"Empir Softw Eng"},{"key":"9697_CR73","doi-asserted-by":"crossref","unstructured":"Ragkhitwetsagul C, Krinke J, Paixao M, Bianco G, Oliveto R (2019) Toxic code snippets on Stack Overflow. Transactions on Software Engineering (Early Access)","DOI":"10.1109\/TSE.2019.2900307"},{"key":"9697_CR74","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139058452","volume-title":"Mining of massive datasets, vol 67","author":"A Rajaraman","year":"2011","unstructured":"Rajaraman A, Ullman JD (2011) Mining of massive datasets, vol 67. Cambridge University Press, Cambridge"},{"key":"9697_CR75","unstructured":"Rilling J, Keivanloo I, Forbes C, Erfani M (2018) IJaDataset 2.0. https:\/\/sites.google.com\/site\/asegsecold\/projects\/seclone , online; access 13-March-2018"},{"issue":"4","key":"9697_CR76","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1108\/eb026866","volume":"46","author":"S Robertson","year":"1990","unstructured":"Robertson S (1990) On term selection for query expansion. J Doc 46(4):359\u2013364","journal-title":"J Doc"},{"key":"9697_CR77","unstructured":"Roy CK, Cordy JR (2008) NICAD: accurate detection of near-miss intentional clones using flexible pretty-printing and code normalization. In: ICPC \u201908, pp 172\u2013181"},{"issue":"12","key":"9697_CR78","first-page":"165","volume":"26","author":"CK Roy","year":"2009","unstructured":"Roy CK, Cordy JR (2009) Near-miss function clones in open source software: an empirical study. J Softw Maint Evol Res Pract 26(12):165\u2013189","journal-title":"J Softw Maint Evol Res Pract"},{"issue":"7","key":"9697_CR79","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1016\/j.scico.2009.02.007","volume":"74","author":"CK Roy","year":"2009","unstructured":"Roy CK, Cordy JR, Koschke R (2009) Comparison and evaluation of code clone detection techniques and tools: a qualitative approach. Sci Comput Program 74 (7):470\u2013495","journal-title":"Sci Comput Program"},{"key":"9697_CR80","doi-asserted-by":"crossref","unstructured":"Sadowski C, Stolee KT, Elbaum S (2015) How developers search for code: a case study. In: ESEC\/FSE \u201915, pp 191\u2013201","DOI":"10.1145\/2786805.2786855"},{"key":"9697_CR81","doi-asserted-by":"crossref","unstructured":"Saini V, Farmahinifarahani F, Lu Y, Baldi P, Lopes C (2018) Oreo: detection of clones in the twilight zone. In: The 26th ACM joint European software engineering conference and symposium on the foundations of software engineering (ESEC\/FSE \u201918)","DOI":"10.1145\/3236024.3236026"},{"key":"9697_CR82","doi-asserted-by":"crossref","unstructured":"Sajnani H, Saini V, Svajlenko J, Roy CK, Lopes CV (2016) SourcererCC: scaling code clone detection to big-code. In: ICSE\u201916, pp 1157\u20131168","DOI":"10.1145\/2884781.2884877"},{"issue":"11","key":"9697_CR83","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang CS (1975) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"key":"9697_CR84","doi-asserted-by":"crossref","unstructured":"Schleimer S, Wilkerson DS, Aiken A (2003) Winnowing: local algorithms for document fingerprinting. In: SIGMOD \u201903, ACM, p 76","DOI":"10.1145\/872757.872770"},{"key":"9697_CR85","doi-asserted-by":"crossref","unstructured":"Sim SE, Gallardo-Valencia RE (2013) Finding source code on the web for remix and reuse. Springer, Berlin","DOI":"10.1007\/978-1-4614-6596-6"},{"issue":"1","key":"9697_CR86","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2063239.2063243","volume":"21","author":"SE Sim","year":"2011","unstructured":"Sim SE, Umarji M, Ratanotayanon S, Lopes CV (2011) How well do search engines support code retrieval on the web? ACM Trans Softw Eng Methodol 21(1):1\u201325","journal-title":"ACM Trans Softw Eng Methodol"},{"issue":"2","key":"9697_CR87","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1109\/MSP.2007.914237","volume":"25","author":"M Slaney","year":"2008","unstructured":"Slaney M, Casey M (2008) Locality-sensitive hashing for finding nearest neighbors. IEEE Signal Proc Mag 25(2):128\u2013131","journal-title":"IEEE Signal Proc Mag"},{"key":"9697_CR88","doi-asserted-by":"crossref","unstructured":"Smucker MD, Allan J, Carterette B (2007) A comparison of statistical significance tests for information retrieval evaluation. In: CIKM \u201907, p 623","DOI":"10.1145\/1321440.1321528"},{"key":"9697_CR89","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Roy CK (2014) Evaluating modern clone detection tools. In: Proceedings of the 30th international conference on software maintenance and evolution (ICSME \u201914), IEEE, pp 321\u2013 330","DOI":"10.1109\/ICSME.2014.54"},{"key":"9697_CR90","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Roy CK (2015) Evaluating clone detection tools with BigCloneBench. In: ICSME\u201915, pp 131\u2013140","DOI":"10.1109\/ICSM.2015.7332459"},{"key":"9697_CR91","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Roy CK (2016) BigCloneEval: a clone detection tool evaluation framework with BigCloneBench. In: Proceedings of the international conference on software maintenance and evolution (ICSME \u201916), vol 1, pp 596\u2013600","DOI":"10.1109\/ICSME.2016.62"},{"key":"9697_CR92","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Roy CK (2017) Fast and flexible large-scale clone detection with CloneWorks. In: Proceedings of the IEEE\/ACM 39th international conference on software engineering companion (ICSE-C \u201917), pp 27\u201330","DOI":"10.1109\/ICSE-C.2017.3"},{"key":"9697_CR93","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Islam JF, Keivanloo I, Roy CK, Mia MM (2014) Towards a big data curated benchmark of inter-project code clones. In: ICSME\u201914, pp 476\u2013480","DOI":"10.1109\/ICSME.2014.77"},{"key":"9697_CR94","doi-asserted-by":"crossref","unstructured":"Tamersoy A, Roundy K, Chau DH (2014) Guilt by association: Large scale malware detection by mining. In: Proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining (KDD \u201914). ACM, New York, pp 1524\u20131533","DOI":"10.1145\/2623330.2623342"},{"key":"9697_CR95","doi-asserted-by":"crossref","unstructured":"Taube-Schock C, Walker RJ, Witten IH (2011) Can we avoid high coupling?. In: ECOOP \u201911, pp 204\u2013 228","DOI":"10.1007\/978-3-642-22655-7_10"},{"key":"9697_CR96","doi-asserted-by":"crossref","unstructured":"Tempero E, Anslow C, Dietrich J, Han T, Li J, Lumpe M, Melton H, Noble J (2010) Qualitas corpus: a curated collection of Java code for empirical studies. In: APSEC \u201910, pp 336\u2013345","DOI":"10.1109\/APSEC.2010.46"},{"key":"9697_CR97","unstructured":"van Bruggen D (2017) JavaParser \u2013 process Java code programmatically. http:\/\/javaparser.org , accessed: 2017-11-21"},{"issue":"2 (Summer, 2000","key":"9697_CR98","first-page":"101","volume":"25","author":"A Vargha","year":"2000","unstructured":"Vargha A, Delaney HD (2000) A critique and improvement of the CL common language effect size statistics of McGraw and Wong. J Educ Behav Stat 25(2 (Summer, 2000)):101\u2013132","journal-title":"J Educ Behav Stat"},{"key":"9697_CR99","doi-asserted-by":"crossref","unstructured":"Vasilescu B, Serebrenik A, van den Brand M (2011) You can\u2019t control the unfamiliar: a study on the relations between aggregation techniques for software metrics. In: ICSM \u201911, pp 313\u2013322","DOI":"10.1109\/ICSM.2011.6080798"},{"key":"9697_CR100","doi-asserted-by":"crossref","unstructured":"Wang T, Harman M, Jia Y, Krinke J (2013) Searching for better configurations: a rigorous approach to clone evaluation. In: ESEC\/FSE \u201913, pp 455\u2013465","DOI":"10.1145\/2491411.2491420"},{"key":"9697_CR101","doi-asserted-by":"crossref","unstructured":"White M, Tufano M, Vendome C, Poshyvanyk D (2016) Deep learning code fragments for code clone detection. In: ASE \u201916, pp 87\u201398","DOI":"10.1145\/2970276.2970326"},{"key":"9697_CR102","doi-asserted-by":"crossref","unstructured":"Yang D, Martins P, Saini V, Lopes C (2017) Stack Overflow in Github: any snippets there?. In: MSR \u201917","DOI":"10.1109\/MSR.2017.13"},{"key":"9697_CR103","doi-asserted-by":"publisher","unstructured":"Zhang F, Niu H, Keivanloo I, Zou Y (2017) Expanding queries for code search using semantically related API class-names. TSE https:\/\/doi.org\/10.1109\/TSE.2017.2750682","DOI":"10.1109\/TSE.2017.2750682"},{"key":"9697_CR104","doi-asserted-by":"crossref","unstructured":"Zhang H (2008) Exploring regularity in source code: software science and Zipf\u2019s law. In: WCRE\u201908, pp 101\u2013110","DOI":"10.1109\/WCRE.2008.37"},{"key":"9697_CR105","doi-asserted-by":"publisher","DOI":"10.4159\/harvard.9780674434929","volume-title":"Selective studies and the principle of relative frequency in language","author":"GK Zipf","year":"1932","unstructured":"Zipf GK (1932) Selective studies and the principle of relative frequency in language. Harvard University Press, Cambridge"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-019-09697-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10664-019-09697-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-019-09697-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T00:42:04Z","timestamp":1663029724000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10664-019-09697-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,5]]},"references-count":105,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["9697"],"URL":"https:\/\/doi.org\/10.1007\/s10664-019-09697-7","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,3,5]]},"assertion":[{"value":"5 March 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}