{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:23:48Z","timestamp":1750220628954,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,12,21]],"date-time":"2020-12-21T00:00:00Z","timestamp":1608508800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,12,21]]},"DOI":"10.1145\/3324884.3415291","type":"proceedings-article","created":{"date-parts":[[2021,1,27]],"date-time":"2021-01-27T23:38:56Z","timestamp":1611790736000},"page":"1316-1320","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Sosed"],"prefix":"10.1145","author":[{"given":"Egor","family":"Bogomolov","sequence":"first","affiliation":[{"name":"JetBrains Research, Saint Petersburg, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaroslav","family":"Golubev","sequence":"additional","affiliation":[{"name":"JetBrains Research, Saint Petersburg, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Artyom","family":"Lobanov","sequence":"additional","affiliation":[{"name":"JetBrains Research, Saint Petersburg, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vladimir","family":"Kovalenko","sequence":"additional","affiliation":[{"name":"JetBrains Research, JetBrains N.V., Amsterdam, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Timofey","family":"Bryksin","sequence":"additional","affiliation":[{"name":"Saint Petersburg State University, Saint Petersburg, Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,1,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2019. The State of the Octoverse. https:\/\/octoverse.github.com\/"},{"key":"e_1_3_2_1_2_1","unstructured":"2020. go-enry GitHub: enry. https:\/\/github.com\/go-enry\/enry"},{"key":"e_1_3_2_1_3_1","unstructured":"2020. JetBrains Research GitHub: Buckwheat. https:\/\/github.com\/JetBrains-Research\/buckwheat"},{"key":"e_1_3_2_1_4_1","unstructured":"2020. JetBrains Research GitHub: Sosed. https:\/\/github.com\/JetBrains-Research\/sosed"},{"key":"e_1_3_2_1_5_1","unstructured":"2020. Pygments: Python syntax highlighter. https:\/\/pygments.org\/"},{"volume-title":"The most popular languages of GitHub's pull requests, 1 quarter","year":"2020","key":"e_1_3_2_1_6_1","unstructured":"2020. The most popular languages of GitHub's pull requests, 1 quarter, 2020. https:\/\/madnight.github.io\/githut\/#\/pull_requests\/2020\/1"},{"key":"e_1_3_2_1_7_1","unstructured":"2020. tree-sitter GitHub: tree-sitter. https:\/\/github.com\/tree-sitter\/tree-sitter"},{"key":"e_1_3_2_1_8_1","volume-title":"code2seq: Generating Sequences from Structured Representations of Code. CoRR abs\/1808.01400","author":"Alon Uri","year":"2018","unstructured":"Uri Alon, Omer Levy, and Eran Yahav. 2018. code2seq: Generating Sequences from Structured Representations of Code. CoRR abs\/1808.01400 (2018). arXiv:1808.01400 http:\/\/arxiv.org\/abs\/1808.01400"},{"key":"e_1_3_2_1_9_1","article-title":"Latent Dirichlet Allocation","author":"Blei David M.","year":"2003","unstructured":"David M. Blei, Andrew Y. Ng, and Michael I. Jordan. 2003. Latent Dirichlet Allocation. J. Mach. Learn. Res. 3, null (March 2003), 993--1022.","journal-title":"J. Mach. Learn. Res. 3, null"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2685305"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/3001460.3001507"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-23829-6_30"},{"key":"e_1_3_2_1_15_1","volume-title":"Global Relational Models of Source Code. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1lnbRNtwr","author":"Hellendoorn Vincent J.","year":"2020","unstructured":"Vincent J. Hellendoorn, Charles Sutton, Rishabh Singh, Petros Maniatis, and David Bieber. 2020. Global Relational Models of Source Code. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1lnbRNtwr"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v050.i10"},{"key":"e_1_3_2_1_17_1","volume-title":"Billion-scale similarity search with GPUs. arXiv preprint arXiv:1702.08734","author":"Johnson Jeff","year":"2017","unstructured":"Jeff Johnson, Matthijs Douze, and Herv\u00e9 J\u00e9gou. 2017. Billion-scale similarity search with GPUs. arXiv preprint arXiv:1702.08734 (2017)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"S. Kullback and R. A. Leibler. 1951. On Information and Sufficiency. Ann. Math. Statist. 22 1 (03 1951) 79--86. 10.1214\/aoms\/1177729694","DOI":"10.1214\/aoms\/1177729694"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"L. Li T. F. Bissyand\u00e9 and J. Klein. 2017. SimiDroid: Identifying and Explaining Similarities in Android Apps. In 2017 IEEE Trustcom\/BigDataSE\/ICESS. 136--143.","DOI":"10.1109\/Trustcom\/BigDataSE\/ICESS.2017.230"},{"volume-title":"2016 IEEE 24th International Conference on Program Comprehension (ICPC). 1--10","author":"Linares-V\u00e1squez M.","key":"e_1_3_2_1_20_1","unstructured":"M. Linares-V\u00e1squez, A. Holtzhauer, and D. Poshyvanyk. 2016. On automatically detecting similar Android apps. In 2016 IEEE 24th International Conference on Program Comprehension (ICPC). 1--10."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/tit.1982.1056489"},{"key":"e_1_3_2_1_22_1","unstructured":"Vadim Markovtsev. 2017. GitHub word2vec 120k. https:\/\/data.world\/vmarkovtsev\/github-word-2-vec-120-k."},{"key":"e_1_3_2_1_23_1","volume-title":"Topic modeling of public repositories at scale using names in source code. arXiv preprint arXiv:1704.00135","author":"Markovtsev Vadim","year":"2017","unstructured":"Vadim Markovtsev and Eiso Kant. 2017. Topic modeling of public repositories at scale using names in source code. arXiv preprint arXiv:1704.00135 (2017)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/2337223.2337267"},{"volume-title":"Evolving Software Systems","author":"Mens Tom","key":"e_1_3_2_1_25_1","unstructured":"Tom Mens, Alexander Serebrenik, and Anthony Cleve. 2014. Evolving Software Systems. Springer Publishing Company, Incorporated."},{"key":"e_1_3_2_1_26_1","volume-title":"Snowball: A language for stemming algorithms.","author":"Porter Martin F","year":"2001","unstructured":"Martin F Porter. 2001. Snowball: A language for stemming algorithms."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1036"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","unstructured":"Xiaobing Sun Xiangyue Liu Li Bin Yucong Duan Hui Yang and Jiajun Hu. 2016. Exploring topic models in software engineering data analysis: A survey. 357--362. 10.1109\/SNPD.2016.7515925","DOI":"10.1109\/SNPD.2016.7515925"},{"volume-title":"2012 28th IEEE International Conference on Software Maintenance (ICSM). 600--603","author":"Thung F.","key":"e_1_3_2_1_29_1","unstructured":"F. Thung, D. Lo, and L. Jiang. 2012. Detecting similar applications with collaborative tagging. In 2012 28th IEEE International Conference on Software Maintenance (ICSM). 600--603."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9868.00293"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-014-5476-6"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2017.7884605"}],"event":{"name":"ASE '20: 35th IEEE\/ACM International Conference on Automated Software Engineering","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"],"location":"Virtual Event Australia","acronym":"ASE '20"},"container-title":["Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3324884.3415291","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3324884.3415291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:37Z","timestamp":1750197697000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3324884.3415291"}},"subtitle":["a tool for finding similar software projects"],"short-title":[],"issued":{"date-parts":[[2020,12,21]]},"references-count":32,"alternative-id":["10.1145\/3324884.3415291","10.1145\/3324884"],"URL":"https:\/\/doi.org\/10.1145\/3324884.3415291","relation":{},"subject":[],"published":{"date-parts":[[2020,12,21]]},"assertion":[{"value":"2021-01-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}