{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T06:37:38Z","timestamp":1770273458271,"version":"3.49.0"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,2,25]],"date-time":"2021-02-25T00:00:00Z","timestamp":1614211200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,25]],"date-time":"2021-02-25T00:00:00Z","timestamp":1614211200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1633437"],"award-info":[{"award-number":["1633437"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s10664-020-09905-9","type":"journal-article","created":{"date-parts":[[2021,2,25]],"date-time":"2021-02-25T17:03:47Z","timestamp":1614272627000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["World of code: enabling a research workflow for mining and analyzing the universe of open source VCS data"],"prefix":"10.1007","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3642-8012","authenticated-orcid":false,"given":"Yuxing","family":"Ma","sequence":"first","affiliation":[]},{"given":"Tapajit","family":"Dey","sequence":"additional","affiliation":[]},{"given":"Chris","family":"Bogart","sequence":"additional","affiliation":[]},{"given":"Sadika","family":"Amreen","sequence":"additional","affiliation":[]},{"given":"Marat","family":"Valiev","sequence":"additional","affiliation":[]},{"given":"Adam","family":"Tutko","sequence":"additional","affiliation":[]},{"given":"David","family":"Kennard","sequence":"additional","affiliation":[]},{"given":"Russell","family":"Zaretzki","sequence":"additional","affiliation":[]},{"given":"Audris","family":"Mockus","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,25]]},"reference":[{"issue":"1","key":"9905_CR1","first-page":"3","volume":"32","author":"DJ Abadi","year":"2009","unstructured":"Abadi DJ (2009) Data management in the cloud: limitations and opportunities. IEEE Data Eng Bull 32(1):3\u201312","journal-title":"IEEE Data Eng Bull"},{"key":"9905_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal S, Narasayya V, Yang B (2004) Integrating vertical and horizontal partitioning into automated physical database design. In: Proceedings of the 2004 ACM SIGMOD international conference on management of data. ACM, pp 359\u2013370","DOI":"10.1145\/1007568.1007609"},{"key":"9905_CR3","unstructured":"Amreen S, Mockus A, Bogart C, Zhang Y, Zaretzki R (2019) Alfaa: active learning fingerprint based anti-aliasing for correcting developer identity errors in version control data. arXiv:1901.03363"},{"issue":"2","key":"9905_CR4","doi-asserted-by":"publisher","first-page":"1136","DOI":"10.1007\/s10664-019-09786-7","volume":"25","author":"S Amreen","year":"2020","unstructured":"Amreen S, Mockus A, Zaretzki R, Bogart C, Zhang Y (2020) ALFAA: Active Learning Fingerprint based Anti-Aliasing for correcting developer identity errors in version control systems. Empir Softw Eng 25(2):1136\u20131167","journal-title":"Empir Softw Eng"},{"key":"9905_CR5","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1016\/j.scico.2012.04.008","volume":"79","author":"S Bajracharya","year":"2014","unstructured":"Bajracharya S, Ossher J, Lopes C (2014) Sourcerer: an infrastructure for large-scale collection and analysis of open-source code. Sci Comput Program 79:241\u2013259","journal-title":"Sci Comput Program"},{"issue":"5","key":"9905_CR6","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1145\/1095430.1081736","volume":"30","author":"J Bevan","year":"2005","unstructured":"Bevan J, Whitehead EJ Jr, Kim S, Godfrey M (2005) Facilitating software evolution research with kenyon. ACM SIGSOFT Softw Eng Notes 30(5):177\u2013186","journal-title":"ACM SIGSOFT Softw Eng Notes"},{"key":"9905_CR7","doi-asserted-by":"publisher","unstructured":"Bird C, Gourley A, Devanbu P, Gertz M, Swaminathan A (2006) Mining email social networks. In: Proceedings of the 2006 international workshop on mining software repositories, MSR \u201906. https:\/\/doi.org\/10.1145\/1137983.1138016. ACM, New York, pp 137\u2013143","DOI":"10.1145\/1137983.1138016"},{"key":"9905_CR8","doi-asserted-by":"crossref","unstructured":"Bird C, Rigby PC, Barr ET, Hamilton DJ, German DM, Devanbu P (2009) The promises and perils of mining git. In: 6th IEEE international working conference on mining software repositories, 2009. MSR\u201909. IEEE, pp 1\u201310","DOI":"10.1109\/MSR.2009.5069475"},{"key":"9905_CR9","doi-asserted-by":"crossref","unstructured":"Budde R, Kautz K, Kuhlenkamp K, Z\u00fcllighoven H (1992) Prototyping. In: Prototyping. Springer, pp 33\u201346","DOI":"10.1007\/978-3-642-76820-0_5"},{"key":"9905_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-0076-6","volume-title":"Pro git","author":"S Chacon","year":"2014","unstructured":"Chacon S, Straub B (2014) Pro git. Springer Nature, Berlin"},{"key":"9905_CR11","doi-asserted-by":"crossref","unstructured":"Coelho J, Valente MT, Silva LL, Shihab E (2018) Identifying unmaintained projects in github. In: Proceedings of the 12th ACM\/IEEE international symposium on empirical software engineering and measurement. ACM","DOI":"10.1145\/3239235.3240501"},{"issue":"4","key":"9905_CR12","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1109\/MS.2013.68","volume":"30","author":"J Czerwonka","year":"2013","unstructured":"Czerwonka J, Nagappan N, Schulte W, Murphy B (2013) Codemine: building a software development data analytics platform at microsoft. IEEE Softw 30(4):64\u201371","journal-title":"IEEE Softw"},{"key":"9905_CR13","doi-asserted-by":"crossref","unstructured":"Dey T, Mockus A (2018a) Are software dependency supply chain metrics useful in predicting change of popularity of npm packages?. In: Proceedings of the 14th international conference on predictive models and data analytics in software engineering. ACM, pp 66\u201369","DOI":"10.1145\/3273934.3273942"},{"key":"9905_CR14","doi-asserted-by":"crossref","unstructured":"Dey T, Mockus A (2018b) Modeling relationship between post-release faults and usage in mobilesoftware. In: Proceedings of the 14th international conference on predictive models and data analytics in software engineering, pp 56\u201365","DOI":"10.1145\/3273934.3273941"},{"key":"9905_CR15","doi-asserted-by":"publisher","unstructured":"Dey T, Mockus A (2020a) A dataset of pull requests and a trained random forest model forpredicting pull request acceptance. https:\/\/doi.org\/10.5281\/zenodo.3858046","DOI":"10.5281\/zenodo.3858046"},{"issue":"2","key":"9905_CR16","doi-asserted-by":"publisher","first-page":"1596","DOI":"10.1007\/s10664-019-09791-w","volume":"25","author":"T Dey","year":"2020","unstructured":"Dey T, Mockus A (2020b) Deriving a usage-independent software quality metric. Empir Softw Eng 25(2):1596\u20131641","journal-title":"Empir Softw Eng"},{"key":"9905_CR17","doi-asserted-by":"crossref","unstructured":"Dey T, Mockus A (2020c) Effect of technical and social factors on pull request quality for thenpm ecosystem. In: Proceedings of the 14th ACM\/IEEE international symposium on empirical software engineering and measurement (ESEM), pp 1\u201311","DOI":"10.1145\/3382494.3410685"},{"key":"9905_CR18","doi-asserted-by":"crossref","unstructured":"Dey T, Ma Y, Mockus A (2019a) Patterns of effort contribution and demand and user classification based on participation patterns in npm ecosystem. In: Proceedings of the fifteenth international conference on predictive models and data analytics in software engineering, pp 36\u201345","DOI":"10.1145\/3345629.3345634"},{"key":"9905_CR19","doi-asserted-by":"crossref","unstructured":"Dey T, Ma Y, Mockus A (2019b) Patterns of effort contribution and demand and user classification based on participation patterns in npm ecosystem. arXiv:1907.06538","DOI":"10.1145\/3345629.3345634"},{"key":"9905_CR20","doi-asserted-by":"publisher","unstructured":"Dey T, Mousavi S, Ponce E, Fry T, Vasilescu B, Filippova A, Mockus A (2020a) A dataset of bot commits. https:\/\/doi.org\/10.5281\/zenodo.3610205","DOI":"10.5281\/zenodo.3610205"},{"key":"9905_CR21","doi-asserted-by":"crossref","unstructured":"Dey T, Mousavi S, Ponce E, Fry T, Vasilescu B, Filippova A, Mockus A (2020b) Detecting and characterizing bots that commit code. In: Proceedings of the 17th international conference on mining software repositories, pp 209\u2013219","DOI":"10.1145\/3379597.3387478"},{"key":"9905_CR22","doi-asserted-by":"crossref","unstructured":"Dey T, Karnauch A, Mockus A (2020c) Representation of developer expertise in open source software. arXiv:2005.10176","DOI":"10.1109\/ICSE43902.2021.00094"},{"key":"9905_CR23","doi-asserted-by":"crossref","unstructured":"Dey T, Vasilescu B, Mockus A (2020d) An exploratory study of bot commits. In: Proceedings of the IEEE\/ACM 42nd international conference on software engineering workshops, pp 61\u201365","DOI":"10.1145\/3387940.3391502"},{"key":"9905_CR24","doi-asserted-by":"publisher","unstructured":"Dey T, Vasilescu B, Mockus A (2020e) A mapping between Bot Commit, Projects, Files, and Blobs [Data set]. Zenodo. https:\/\/doi.org\/10.5281\/zenodo.3699665","DOI":"10.5281\/zenodo.3699665"},{"key":"9905_CR25","unstructured":"Di Cosmo R, Zacchiroli S (2017) Software heritage: why and how to preserve software source code. In: iPRES 2017: 14th international conference on digital preservation. Kyoto, Japan. https:\/\/www.softwareheritage.org\/wp-content\/uploads\/2020\/01\/ipres-2017-swh.pdfhttps:\/\/hal.archives-ouvertes.fr\/hal-01590958"},{"key":"9905_CR26","doi-asserted-by":"crossref","unstructured":"Ducasse S, G\u00eerba T, Nierstrasz O (2005) Moose: an agile reengineering environment. In: ACM SIGSOFT software engineering notes, vol 30. ACM, pp 99\u2013102","DOI":"10.1145\/1095430.1081723"},{"key":"9905_CR27","doi-asserted-by":"crossref","unstructured":"Dyer R (2013) Task fusion: improving utilization of multi-user clusters. In: Proceedings of the 2013 companion publication for conference on Systems, programming, & applications: software for humanity, SPLASH SRC, pp 117\u2013118","DOI":"10.1145\/2508075.2514878"},{"key":"9905_CR28","doi-asserted-by":"crossref","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2013) Boa: a language and infrastructure for analyzing ultra-large-scale software repositories. In: Proceedings of the 35th international conference on software engineering, ICSE\u201913, pp 422\u2013431","DOI":"10.1109\/ICSE.2013.6606588"},{"key":"9905_CR29","doi-asserted-by":"crossref","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2015a) Boa: An enabling language and infrastructure for ultra-large-scale msr studies. In: The art and science of analyzing software data. Morgan Kaufmann, pp 593\u2013621","DOI":"10.1016\/B978-0-12-411519-4.00020-3"},{"issue":"1","key":"9905_CR30","doi-asserted-by":"publisher","first-page":"7:1","DOI":"10.1145\/2803171","volume":"25","author":"R Dyer","year":"2015","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2015b) Boa: ultra-large-scale software repository and source-code mining. ACM Trans Softw Eng Methodol 25(1):7:1\u20137:34","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"9905_CR31","doi-asserted-by":"crossref","unstructured":"Dyer R, Rajan H, Nguyen TN (2013) Declarative visitors to ease fine-grained source code mining with full history on billions of AST nodes. In: Proceedings of the 12th international conference on generative programming: concepts & experiences, GPCE, pp 23\u201332","DOI":"10.1145\/2517208.2517226"},{"key":"9905_CR32","doi-asserted-by":"crossref","unstructured":"Eastlake 3rd D, Jones P (2001) Us secure hash algorithm 1 (sha1). Tech. rep","DOI":"10.17487\/rfc3174"},{"key":"9905_CR33","doi-asserted-by":"crossref","unstructured":"Fry T, Dey T, Karnauch A, Mockus A (2020) A dataset and an approach for identity resolution of 38 million author ids extracted from 2b git commits. In: Proceedings of the 17th international conference on mining software repositories, pp 518\u2013522","DOI":"10.1145\/3379597.3387500"},{"key":"9905_CR34","unstructured":"German D, Mockus A (2003) Automating the measurement of open source projects. In: Proceedings of the 3rd workshop on open source software engineering. University College Cork Cork Ireland, pp 63\u201367"},{"issue":"2","key":"9905_CR35","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/MS.2016.47","volume":"33","author":"I Gorton","year":"2016","unstructured":"Gorton I, Bener AB, Mockus A (2016) Software engineering for big data systems. IEEE Softw 33(2):32\u201335","journal-title":"IEEE Softw"},{"key":"9905_CR36","unstructured":"Gousios G (2013) The ghtorrent dataset and tool suite. In: Proceedings of the 10th working conference on mining software repositories, MSR \u201913. http:\/\/dl.acm.org\/citation.cfm?id=2487085.2487132. IEEE Press, Piscataway, pp 233\u2013236"},{"key":"9905_CR37","doi-asserted-by":"crossref","unstructured":"Gousios G, Pinzger M, Deursen AV (2014) An exploratory study of the pull-based software development model. In: Proceedings of the 36th international conference on software engineering. ACM, pp 345\u2013355","DOI":"10.1145\/2568225.2568260"},{"key":"9905_CR38","doi-asserted-by":"crossref","unstructured":"Gousios G, Spinellis D (2009) Alitheia core: an extensible software quality monitoring platform. In: IEEE 31st international conference on software engineering, 2009. ICSE 2009. IEEE, pp 579\u2013582","DOI":"10.1109\/ICSE.2009.5070560"},{"key":"9905_CR39","doi-asserted-by":"crossref","unstructured":"Gousios G, Spinellis D (2012) Ghtorrent: Github\u2019s data from a firehose. In: 2012 9th ieee working conference on mining software repositories (msr). IEEE, pp 12\u201321","DOI":"10.1109\/MSR.2012.6224294"},{"key":"9905_CR40","doi-asserted-by":"crossref","unstructured":"Gousios G, Vasilescu B, Serebrenik A, Zaidman A (2014) Lean ghtorrent: Github data on demand. In: Proceedings of the 11th working conference on mining software repositories. ACM, pp 384\u2013387","DOI":"10.1145\/2597073.2597126"},{"key":"9905_CR41","doi-asserted-by":"crossref","unstructured":"Gousios G, Zaidman A (2014) A dataset for pull-based development research. In: Proceedings of the 11th working conference on mining software repositories. ACM, pp 368\u2013371","DOI":"10.1145\/2597073.2597122"},{"issue":"3","key":"9905_CR42","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/s10664-009-9120-1","volume":"10","author":"R Hackbarth","year":"2010","unstructured":"Hackbarth R, Mockus A, Palframan J, Weiss D (2010) Assessing the state of software in a large enterprise. J Empir Softw Eng 10(3):219\u2013249","journal-title":"J Empir Softw Eng"},{"issue":"3","key":"9905_CR43","doi-asserted-by":"publisher","first-page":"17","DOI":"10.4018\/jitwe.2006070102","volume":"1","author":"J Howison","year":"2006","unstructured":"Howison J, Conklin M, Crowston K (2006) Flossmole: a collaborative repository for floss research data and analyses. Int J Info Technol Web Eng (IJITWE) 1(3):17\u201326","journal-title":"Int J Info Technol Web Eng (IJITWE)"},{"key":"9905_CR44","unstructured":"Hung CS, Dyer R (2020) Boa views: easy modularization and sharing of msr analyses. In: Proceedings of the 17th international conference on mining software repositories, pp 147\u2013157"},{"key":"9905_CR45","doi-asserted-by":"publisher","unstructured":"Kalliamvakou E, Gousios G, Blincoe K, Singer L, German DM, Damian D (2014) The promises and perils of mining github. In: Proceedings of the 11th working conference on mining software repositories, MSR 2014. https:\/\/doi.org\/10.1145\/2597073.2597074. Association for Computing Machinery, New York, pp 92\u2013101","DOI":"10.1145\/2597073.2597074"},{"key":"9905_CR46","doi-asserted-by":"crossref","unstructured":"Kim S, Zimmermann T, Kim M, Hassan AE, Mockus A, G\u00eerba T, Pinzger M, Jr. EJW, Zeller A (2006) Ta-re: an exchange language for mining software repositories. In: ICSE\u201906 workshop on mining software repositories. http:\/\/dl.acm.org\/authorize?804411. Shanghai, China, pp 22\u201325","DOI":"10.1145\/1137983.1137990"},{"key":"9905_CR47","unstructured":"Le Q, Mikolov T (2014) Distributed representation of sentences and documents. In: Proceedings of the 31st international conference on machine learning. https:\/\/cs.stanford.edu\/quocle\/paragraph_vector.pdf, vol 32. JMLR, Beijing"},{"issue":"2","key":"9905_CR48","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1109\/MC.2010.58","volume":"43","author":"N Leavitt","year":"2010","unstructured":"Leavitt N (2010) Will NoSQL databases live up to their promise? Computer 43(2):12\u201314. https:\/\/doi.org\/10.1109\/MC.2010.58","journal-title":"Computer"},{"issue":"11","key":"9905_CR49","doi-asserted-by":"publisher","first-page":"825","DOI":"10.1109\/32.368126","volume":"20","author":"H Lichter","year":"1994","unstructured":"Lichter H, Schneider-Hufschmidt M, Zullighoven H (1994) Prototyping in industrial software projects-bridging the gap between theory and practice. IEEE Trans Softw Eng 20(11):825\u2013832","journal-title":"IEEE Trans Softw Eng"},{"key":"9905_CR50","doi-asserted-by":"publisher","first-page":"e2551v1","DOI":"10.7287\/peerj.preprints.2551v1","volume":"4","author":"Y Ma","year":"2016","unstructured":"Ma Y, Dey T, Smith JM, Wilder N, Mockus A (2016) Crowdsourcing the discovery of software repositories in an educational environment. PeerJ Preprints 4:e2551v1. https:\/\/doi.org\/10.7287\/peerj.preprints.2551v1","journal-title":"PeerJ Preprints"},{"key":"9905_CR51","doi-asserted-by":"publisher","unstructured":"Ma Y, Mockus A, Zaretzki R, Bichescu R, Bradley B (2020) A methodology for analyzing uptake of software technologies among developers. IEEE Transactions on Software Engineering. https:\/\/doi.org\/10.1109\/TSE.2020.2993758","DOI":"10.1109\/TSE.2020.2993758"},{"key":"9905_CR52","doi-asserted-by":"crossref","unstructured":"Mockus A (2007) Large-scale code reuse in open source software. In: ICSE\u201907 intl. workshop on emerging trends in FLOSS research and development. Minneapolis, Minnesota. papers\/ossreuse.pdf","DOI":"10.1109\/FLOSS.2007.10"},{"key":"9905_CR53","doi-asserted-by":"crossref","unstructured":"Mockus A (2009) Amassing and indexing a large sample of version control systems: towards the census of public source code history. In: 6th IEEE working conference on mining software repositories. papers\/amassing.pdf","DOI":"10.1109\/MSR.2009.5069476"},{"key":"9905_CR54","doi-asserted-by":"crossref","unstructured":"Mockus A (2014) Engineering big data solutions. In: ICSE\u201914 FOSE. papers\/BigData.pdf","DOI":"10.1145\/2593882.2593889"},{"key":"9905_CR55","unstructured":"Moniruzzaman A, Hossain SA (2013) Nosql database: new era of databases for big data analytics-classification, characteristics and comparison. arXiv:1307.0191"},{"issue":"6","key":"9905_CR56","doi-asserted-by":"publisher","first-page":"3219","DOI":"10.1007\/s10664-017-9512-6","volume":"22","author":"N Munaiah","year":"2017","unstructured":"Munaiah N, Kroh S, Cabrey C, Nagappan M (2017) Curating github for engineered software projects. Empir Softw Eng 22(6):3219\u20133253","journal-title":"Empir Softw Eng"},{"key":"9905_CR57","unstructured":"Nexus (2019) Repository. https:\/\/www.sonatype.com\/nexus-repository-oss. Accessed 02 Jan 2019"},{"key":"9905_CR58","doi-asserted-by":"crossref","unstructured":"Ossher J, Bajracharya S, Linstead E, Baldi P, Lopes C (2009) Sourcererdb: an aggregated repository of statically analyzed and cross-linked open source java projects. In: 6th IEEE international working conference on mining software repositories, 2009. MSR\u201909. IEEE, pp 183\u2013186","DOI":"10.1109\/MSR.2009.5069501"},{"key":"9905_CR59","doi-asserted-by":"crossref","unstructured":"Pietri A, Spinellis D, Zacchiroli S (2019) The software heritage graph dataset: public software development under one roof. In: 2019 IEEE\/ACM 16th international conference on mining software repositories (MSR). IEEE, pp 138\u2013142","DOI":"10.1109\/MSR.2019.00030"},{"key":"9905_CR60","unstructured":"Qi Z (2007) Fast sha1 implementation. US Patent 7,299,355"},{"key":"9905_CR61","unstructured":"Rajan H, Nguyen TN, Dyer R, Nguyen HA (2015) Boa website. http:\/\/boa.cs.iastate.edu\/"},{"key":"9905_CR62","unstructured":"Rosch E (2002) Principles of categorization. In: Levitin DJ (ed) Foundations of cognitive psychology: core readings. MIT Press, pp 251\u2013270"},{"key":"9905_CR63","doi-asserted-by":"crossref","unstructured":"Rozenberg D, Beschastnikh I, Kosmale F, Poser V, Becker H, Palyart M, Murphy GC (2016) Comparing repositories visually with repograms. In: Proceedings of the 13th international conference on mining software repositories. ACM, pp 109\u2013120","DOI":"10.1145\/2901739.2901768"},{"issue":"4","key":"9905_CR64","first-page":"1","volume":"19","author":"P Russom","year":"2011","unstructured":"Russom P, et al. (2011) Big data analytics. TDWI best practices report, fourth quarter 19(4):1\u201334","journal-title":"TDWI best practices report, fourth quarter"},{"key":"9905_CR65","unstructured":"Sayyad Shirabad J, Menzies T (2005) The PROMISE repository of software engineering databases. School of Information Technology and Engineering, University of Ottawa, Canada. http:\/\/promise.site.uottawa.ca\/SERepository"},{"key":"9905_CR66","doi-asserted-by":"crossref","unstructured":"Spinellis D, Kotti Z, Mockus A (2020) A dataset for github repository deduplication. In: Proceedings of the 17th international conference on mining software repositories, pp 523\u2013527","DOI":"10.1145\/3379597.3387496"},{"key":"9905_CR67","doi-asserted-by":"crossref","unstructured":"Tiwari NM, Upadhyaya G, Nguyen HA, Rajan H (2017) Candoia: a platform for building and sharing mining software repositories tools as apps. In: MSR\u201917: 14th international conference on mining software repositories","DOI":"10.1109\/MSR.2017.56"},{"key":"9905_CR68","doi-asserted-by":"crossref","unstructured":"Tiwari NM, Upadhyaya G, Rajan H (2016) Candoia: a platform and ecosystem for mining software repositories tools. In: Proceedings of the 38th international conference on software engineering companion. ACM, pp 759\u2013764","DOI":"10.1145\/2889160.2892662"},{"key":"9905_CR69","doi-asserted-by":"crossref","unstructured":"Upadhyaya G, Rajan H (2017) On accelerating ultra-large-scale mining. In: Proceedings of the 39th international conference on software engineering: new ideas and emerging results track. IEEE Press, pp 39\u201342","DOI":"10.1109\/ICSE-NIER.2017.11"},{"key":"9905_CR70","doi-asserted-by":"crossref","unstructured":"Upadhyaya G, Rajan H (2018) On accelerating source code analysis at massive scale. IEEE Trans Softw Eng","DOI":"10.1109\/TSE.2018.2828848"},{"key":"9905_CR71","doi-asserted-by":"crossref","unstructured":"Wang X, Yin YL, Yu H (2005) Collision search attacks on sha1","DOI":"10.1007\/11535218_1"},{"key":"9905_CR72","unstructured":"Winkler W (1990) String comparator metrics and enhanced decision rules in the fellegi-sunter model of record linkage"},{"key":"9905_CR73","doi-asserted-by":"crossref","unstructured":"Winkler WE (2006) Overview of record linkage and current research directions. Tech. rep. BUREAU OF THE CENSUS","DOI":"10.1002\/9780470057339.var022"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-020-09905-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10664-020-09905-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-020-09905-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T00:35:15Z","timestamp":1671410115000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10664-020-09905-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,25]]},"references-count":73,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["9905"],"URL":"https:\/\/doi.org\/10.1007\/s10664-020-09905-9","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,2,25]]},"assertion":[{"value":"30 October 2020","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 February 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"22"}}