{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:02:24Z","timestamp":1750309344363,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T00:00:00Z","timestamp":1713225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,16]]},"DOI":"10.1145\/3643665.3648047","type":"proceedings-article","created":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T11:49:26Z","timestamp":1723204166000},"page":"23-30","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Topical: Automatic Repository Tagging using Attention on Hybrid Code Embeddings"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5091-7668","authenticated-orcid":false,"given":"Agathe","family":"Lherondelle","sequence":"first","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3234-1358","authenticated-orcid":false,"given":"Varun","family":"Babbar","sequence":"additional","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6726-4065","authenticated-orcid":false,"given":"Yash","family":"Satsangi","sequence":"additional","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0120-3531","authenticated-orcid":false,"given":"Fran","family":"Silavong","sequence":"additional","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5927-6349","authenticated-orcid":false,"given":"Shaltiel","family":"Eloul","sequence":"additional","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5377-1608","authenticated-orcid":false,"given":"Sean","family":"Moran","sequence":"additional","affiliation":[{"name":"JP Morgan Chase, London, England, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3212695"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1806799.1806817"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3303772.3303813"},{"key":"e_1_3_2_1_4_1","article-title":"Latent Dirichlet Allocation","author":"Blei David M.","year":"2003","unstructured":"David M. Blei, Andrew Y. Ng, and Michael I. Jordan. 2003. Latent Dirichlet Allocation. J. Mach. Learn. Res. 3, null (March 2003), 993--1022.","journal-title":"J. Mach. Learn. Res. 3, null"},{"key":"e_1_3_2_1_5_1","volume-title":"Exploring Software Naturalness through Neural Language Models. CoRR abs\/2006.12641","author":"Buratti Luca","year":"2020","unstructured":"Luca Buratti, Saurabh Pujar, Mihaela A. Bornea, J. Scott McCarley, Yunhui Zheng, Gaetano Rossiello, Alessandro Morari, Jim Laredo, Veronika Thost, Yufan Zhuang, and Giacomo Domeniconi. 2020. Exploring Software Naturalness through Neural Language Models. CoRR abs\/2006.12641 (2020). arXiv:2006.12641 https:\/\/arxiv.org\/abs\/2006.12641"},{"key":"e_1_3_2_1_6_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_1_7_1","unstructured":"Hal Daum\u00e9. 2017. A course in machine learning. Hal Daum\u00e9 III."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-013-9285-5"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Zhangyin Feng Daya Guo Duyu Tang Nan Duan Xiaocheng Feng Ming Gong Linjun Shou Bing Qin Ting Liu Daxin Jiang and Ming Zhou. 2020. CodeBERT: A Pre-Trained Model for Programming and Natural Languages. arXiv:2002.08155 [cs.CL]","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_11_1","volume-title":"WASTK: a weighted abstract syntax tree kernel method for source code plagiarism detection. Scientific Programming 2017","author":"Fu Deqiang","year":"2017","unstructured":"Deqiang Fu, Yanyan Xu, Haoran Yu, and Boyang Yang. 2017. WASTK: a weighted abstract syntax tree kernel method for source code plagiarism detection. Scientific Programming 2017 (2017)."},{"key":"e_1_3_2_1_12_1","volume-title":"Colin Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou.","author":"Guo Daya","year":"2021","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, Michele Tufano, Shao Kun Deng, Colin Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou. 2021. GraphCodeBERT: Pre-training Code Representations with Data Flow. arXiv:2009.08366 [cs.SE]"},{"key":"e_1_3_2_1_13_1","volume-title":"2009 IEEE international conference on software maintenance. IEEE, 339--348","author":"Hindle Abram","year":"2009","unstructured":"Abram Hindle, Michael W Godfrey, and Richard C Holt. 2009. What's hot and what's not: Windowed developer topic analysis. In 2009 IEEE international conference on software maintenance. IEEE, 339--348."},{"key":"e_1_3_2_1_14_1","unstructured":"Hamel Husain Ho-Hsiang Wu Tiferet Gazit Miltiadis Allamanis and Marc Brockschmidt. 2020. CodeSearchNet Challenge: Evaluating the State of Semantic Code Search. arXiv:1909.09436 [cs.LG]"},{"volume-title":"SoCeR: A New Source Code Recommendation Technique for Code Reuse. In 2020 IEEE 44th Annual Computers, Software, and Applications Conference (COMPSAC)","author":"Islam Md Mazharul","key":"e_1_3_2_1_15_1","unstructured":"Md Mazharul Islam and Razib Iqbal. 2020. SoCeR: A New Source Code Recommendation Technique for Code Reuse. In 2020 IEEE 44th Annual Computers, Software, and Applications Conference (COMPSAC). IEEE, 1552--1557."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Maliheh Izadi Abbas Heydarnoori and Georgios Gousios. 2021. Topic Recommendation for Software Repositories using Multi-label Classification Algorithms. arXiv:2010.09116 [cs.SE]","DOI":"10.1007\/s10664-021-09976-2"},{"key":"e_1_3_2_1_17_1","volume-title":"A statistical interpretation of term specificity and its application in retrieval. Journal of documentation","author":"Jones Karen Sparck","year":"1972","unstructured":"Karen Sparck Jones. 1972. A statistical interpretation of term specificity and its application in retrieval. Journal of documentation (1972)."},{"key":"e_1_3_2_1_18_1","unstructured":"Aditya Kanade Petros Maniatis Gogul Balakrishnan and Kensen Shi. 2020. Learning and Evaluating Contextual Embedding of Source Code. arXiv:2001.00059 [cs.SE]"},{"key":"e_1_3_2_1_19_1","unstructured":"Rafael Michael Karampatsis and Charles Sutton. 2020. SCELMo: Source Code Embeddings from Language Models. arXiv:2004.13214 [cs.SE]"},{"key":"e_1_3_2_1_20_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_21_1","volume-title":"An introduction to latent semantic analysis. Discourse processes 25, 2--3","author":"Landauer Thomas K","year":"1998","unstructured":"Thomas K Landauer, Peter W Foltz, and Darrell Laham. 1998. An introduction to latent semantic analysis. Discourse processes 25, 2--3 (1998), 259--284."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383458"},{"key":"e_1_3_2_1_23_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2013.6606598"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_2_1_26_1","volume-title":"Project CodeNet: A Large-Scale AI for Code Dataset for Learning a Diversity of Coding Tasks. CoRR abs\/2105.12655","author":"Puri Ruchir","year":"2021","unstructured":"Ruchir Puri, David S. Kung, Geert Janssen, Wei Zhang, Giacomo Domeniconi, Vladimir Zolotov, Julian Dolby, Jie Chen, Mihir R. Choudhury, Lindsey Decker, Veronika Thost, Luca Buratti, Saurabh Pujar, and Ulrich Finkler. 2021. Project CodeNet: A Large-Scale AI for Code Dataset for Learning a Diversity of Coding Tasks. CoRR abs\/2105.12655 (2021). arXiv:2105.12655 https:\/\/arxiv.org\/abs\/2105.12655"},{"key":"e_1_3_2_1_27_1","unstructured":"Alec Radford and Karthik Narasimhan. 2018. Improving Language Understanding by Generative Pre-Training."},{"key":"e_1_3_2_1_28_1","volume-title":"Liu","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. arXiv:1910.10683 [cs.LG]"},{"key":"e_1_3_2_1_29_1","volume-title":"2021 IEEE International Conference on Software Maintenance and Evolution (ICSME). IEEE, 355--365","author":"Faruk Rokon Md Omar","year":"2021","unstructured":"Md Omar Faruk Rokon, Pei Yan, Risul Islam, and Michalis Faloutsos. 2021. Repo2vec: A comprehensive embedding approach for determining repository similarity. In 2021 IEEE International Conference on Software Maintenance and Evolution (ICSME). IEEE, 355--365."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Vitalis Salis Thodoris Sotiropoulos Panos Louridas Diomidis Spinellis and Dimitris Mitropoulos. 2021. PyCG: Practical Call Graph Generation in Python. arXiv:2103.00587 [cs.PL]","DOI":"10.1109\/ICSE43902.2021.00146"},{"key":"e_1_3_2_1_31_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2020. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. arXiv:1910.01108 [cs.CL]"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3084226.3084287"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387496"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417058"},{"key":"e_1_3_2_1_35_1","volume-title":"2019 IEEE\/ACM 16th International Conference on Mining Software Repositories (MSR). IEEE, 18--28","author":"Theeten Bart","year":"2019","unstructured":"Bart Theeten, Frederik Vandeputte, and Tom Van Cutsem. 2019. Import2vec: Learning embeddings for software libraries. In 2019 IEEE\/ACM 16th International Conference on Mining Software Repositories (MSR). IEEE, 18--28."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scico.2012.08.003"},{"key":"e_1_3_2_1_37_1","volume-title":"2012 28th IEEE International Conference on Software Maintenance (ICSM). IEEE, 600--603","author":"Thung Ferdian","year":"2012","unstructured":"Ferdian Thung, David Lo, and Lingxiao Jiang. 2012. Detecting similar applications with collaborative tagging. In 2012 28th IEEE International Conference on Software Maintenance (ICSM). IEEE, 600--603."},{"key":"e_1_3_2_1_38_1","volume-title":"2013 20th Working conference on reverse engineering (WCRE). IEEE, 182--191","author":"Thung Ferdian","year":"2013","unstructured":"Ferdian Thung, David Lo, and Julia Lawall. 2013. Automated library recommendation. In 2013 20th Working conference on reverse engineering (WCRE). IEEE, 182--191."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IWESEP49350.2019.00017"},{"key":"e_1_3_2_1_41_1","volume-title":"European conference on information retrieval. Springer, 29--41","author":"Yi Xing","year":"2009","unstructured":"Xing Yi and James Allan. 2009. A comparative study of utilizing topic models for information retrieval. In European conference on information retrieval. Springer, 29--41."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2017.7884605"},{"key":"e_1_3_2_1_43_1","volume-title":"2019 IEEE International Conference on Data Mining (ICDM). IEEE, 876--885","author":"Zhang Yu","year":"2019","unstructured":"Yu Zhang, Frank F Xu, Sha Li, Yu Meng, Xuan Wang, Qi Li, and Jiawei Han. 2019. Higitclass: Keyword-driven hierarchical classification of github repositories. In 2019 IEEE International Conference on Data Mining (ICDM). IEEE, 876--885."}],"event":{"name":"FinanSE '24: 1st IEEE\/ACM Workshop on Software Engineering Challenges in Financial Firms","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"],"location":"Lisbon Portugal","acronym":"FinanSE '24"},"container-title":["Proceedings of the 1st IEEE\/ACM Workshop on Software Engineering Challenges in Financial Firms"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643665.3648047","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643665.3648047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:32Z","timestamp":1750291532000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643665.3648047"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,16]]},"references-count":43,"alternative-id":["10.1145\/3643665.3648047","10.1145\/3643665"],"URL":"https:\/\/doi.org\/10.1145\/3643665.3648047","relation":{},"subject":[],"published":{"date-parts":[[2024,4,16]]},"assertion":[{"value":"2024-08-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}