{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T17:11:34Z","timestamp":1772039494870,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","funder":[{"name":"European Union","award":["101189664"],"award-info":[{"award-number":["101189664"]}]},{"name":"PRIN 2020","award":["2020W3A5FY"],"award-info":[{"award-number":["2020W3A5FY"]}]},{"name":"PRIN 2022","award":["P2022553SL"],"award-info":[{"award-number":["P2022553SL"]}]},{"name":"PRIN 2022","award":["2022LKJWHC"],"award-info":[{"award-number":["2022LKJWHC"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,17]]},"DOI":"10.1145\/3756681.3757042","type":"proceedings-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:30:04Z","timestamp":1766565004000},"page":"653-658","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Detecting Malicious Source Code in PyPI Packages with LLMs: Does RAG Come in Handy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0308-8997","authenticated-orcid":false,"given":"Motunrayo Osatohanmen","family":"Ibiyo","sequence":"first","affiliation":[{"name":"Department of Information Engineering, Computer Science and Mathematics, University of L'Aquila, L'Aquila, L'Aquila, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2883-3119","authenticated-orcid":false,"given":"Thinakone","family":"Louangdy","sequence":"additional","affiliation":[{"name":"Department of Information Engineering, Computer Science and Mathematics, University of L'Aquila, L'Aquila, L'Aquila, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3666-4162","authenticated-orcid":false,"given":"Phuong T.","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Department of Information Engineering, Computer Science and Mathematics, University of L'Aquila, L'Aquila, L'Aquila, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9872-9542","authenticated-orcid":false,"given":"Claudio","family":"Di Sipio","sequence":"additional","affiliation":[{"name":"Department of Information Engineering, Computer Science and Mathematics, University of L'Aquila, L'Aquila, L'Aquila, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5077-6793","authenticated-orcid":false,"given":"Davide","family":"Di Ruscio","sequence":"additional","affiliation":[{"name":"Department of Information Engineering, Computer Science and Mathematics, University of L'Aquila, L'Aquila, L'Aquila, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643651.3659898"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS61880.2024.10620804"},{"key":"e_1_3_3_2_4_2","unstructured":"Xueying Du Geng Zheng Kaixin Wang Jiayi Feng Wentai Deng Mingwei Liu Bihuan Chen Xin Peng Tao Ma and Yiling Lou. 2024. Vul-RAG: Enhancing LLM-based Vulnerability Detection via Knowledge-level RAG. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.11147 (2024)."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-FoSE59343.2023.00008"},{"key":"e_1_3_3_2_6_2","unstructured":"Florian Roth. 2024. YARA Style Guide - Best Practices for Writing YARA Rules. https:\/\/github.com\/Neo23x0\/YARA-Style-Guide\/ Accessed: 2025-02-20."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3528452"},{"key":"e_1_3_3_2_8_2","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun and Haofen Wang. 2023. Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.10997 (2023)."},{"key":"e_1_3_3_2_9_2","unstructured":"GitHub Security Advisories. 2024. GitHub Security Advisories - Vulnerability Database. https:\/\/github.com\/advisories Accessed: 2025-02-20."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE56229.2023.00135"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Junjie Huang and Quanyan Zhu. 2024. PenHeal: A Two-Stage LLM Framework for Automated Pentesting and Optimal Remediation. 10.48550\/arXiv.2407.17788arXiv:https:\/\/arXiv.org\/abs\/2407.17788 [cs] Read_Status: New Read_Status_Date: 2025-03-10T14:45:21.375Z.","DOI":"10.48550\/arXiv.2407.17788"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3691620.3695492"},{"key":"e_1_3_3_2_13_2","unstructured":"Motunrayo\u00a0Osatohanmen Ibiyo Thinakone Louangdy Phuong\u00a0T. Nguyen Claudio Di Sipio and Davide Di Ruscio. 2025. Replication package for \u201cDetecting Malicious Source Code in PyPI Packages with LLMs: Does RAG Come in Handy?\u201d. https:\/\/github.com\/malexternalsc\/mal-LLM"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/TPS-ISA62245.2024.00036"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Xue Jiang Yihong Dong Lecheng Wang Zheng Fang Qiwei Shang Ge Li Zhi Jin and Wenpin Jiao. 2024. Self-Planning Code Generation with Large Language Models. ACM Trans. Softw. Eng. Methodol. 33 7 Article 182 (Sept. 2024) 30\u00a0pages. 10.1145\/3672456","DOI":"10.1145\/3672456"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643795.3648384"},{"key":"e_1_3_3_2_17_2","unstructured":"Datadog\u00a0Security Labs. 2023. Open-Source Dataset of Malicious Software Packages. https:\/\/github.com\/datadog\/malicious-software-packages-dataset Accessed: 2025-02-19."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/eIT60633.2024.10609922"},{"key":"e_1_3_3_2_19_2","series-title":"(NIPS \u201920)","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-augmented generation for knowledge-intensive NLP tasks. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS \u201920). Curran Associates Inc., Red Hook, NY, USA, Article 793, 16\u00a0pages."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643916.3644434"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/TrustCom53373.2021.00091"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Nitin Naik Paul Jenkins Nick Savage Longzhi Yang Tossapon Boongoen Natthakan Iam-On Kshirasagar Naik and Jingping Song. 2021. Embedded YARA rules: strengthening YARA rules utilising fuzzy hashing and fuzzy rules for malware analysis. Complex & Intelligent Systems 7 (2021) 687\u2013702.","DOI":"10.1007\/s40747-020-00233-5"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Ipek Ozkaya. 2023. Application of Large Language Models to Software Engineering Tasks: Opportunities Risks and Implications. IEEE Softw. 40 3 (2023) 4\u20138. 10.1109\/MS.2023.3248401","DOI":"10.1109\/MS.2023.3248401"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Ya Pan Xiuting Ge Chunrong Fang and Yong Fan. 2020. A systematic literature review of android malware detection using static analysis. IEEE Access 8 (2020) 116363\u2013116379.","DOI":"10.1109\/ACCESS.2020.3002842"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Juri\u00a0Di Rocco Davide\u00a0Di Ruscio Claudio\u00a0Di Sipio Phuong\u00a0Thanh Nguyen and Riccardo Rubei. 2021. Development of recommendation systems for software engineering: the CROSSMINER experience. Empir. Softw. Eng. 26 4 (2021) 69. 10.1007\/S10664-021-09963-7","DOI":"10.1007\/S10664-021-09963-7"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"Ze Sheng Fenghua Wu Xiangwu Zuo Chao Li Yuxin Qiao and Lei Hang. 2024. LProtector: An LLM-driven Vulnerability Detection System. 10.48550\/arXiv.2411.06493arXiv:https:\/\/arXiv.org\/abs\/2411.06493 [cs] Read_Status: New Read_Status_Date: 2025-03-10T14:31:16.455Z.","DOI":"10.48550\/arXiv.2411.06493"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Jagsir Singh and Jaswinder Singh. 2021. A survey on machine learning-based malware detection in executable files. Journal of Systems Architecture 112 (2021) 101861.","DOI":"10.1016\/j.sysarc.2020.101861"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70896-1_3"},{"key":"e_1_3_3_2_29_2","volume-title":"YARA Documentation","author":"al. Victor Alvarez et","year":"2024","unstructured":"Victor Alvarez et al.2024. YARA Documentation. https:\/\/yara.readthedocs.io\/en\/latest\/ Accessed: 2025-02-20."},{"key":"e_1_3_3_2_30_2","unstructured":"Fangzhou Wu Ning Zhang Somesh Jha Patrick McDaniel and Chaowei Xiao. 2024. A new era in llm security: Exploring security concerns in real-world llm-based systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.18649 (2024)."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Shi-Qi Yan Jia-Chen Gu Yun Zhu and Zhen-Hua Ling. 2024. Corrective retrieval augmented generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.15884 (2024).","DOI":"10.2139\/ssrn.5267341"},{"key":"e_1_3_3_2_32_2","unstructured":"YARA HQ. 2024. YARA Forge - A Rule-Sharing Platform for YARA. https:\/\/github.com\/YARAHQ\/yara-forge Accessed: 2025-02-20."},{"key":"e_1_3_3_2_33_2","unstructured":"Jeffy Yu. 2024. Retrieval Augmented Generation Integrated Large Language Models in Smart Contract Vulnerability Detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.14838 (2024)."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/SGEE64306.2024.10865871"},{"key":"e_1_3_3_2_35_2","unstructured":"Nusrat Zahan Philipp Burckhardt Mikola Lysenko Feross Aboukhadijeh and Laurie Williams. 2024. Shifting the Lens: Detecting Malware in npm Ecosystem with Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.12196 (2024)."},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","unstructured":"He Zhang Muhammad\u00a0Ali Babar and Paolo Tell. 2011. Identifying relevant studies in software engineering. Information and Software Technology 53 6 (2011) 625\u2013637. 10.1016\/j.infsof.2010.12.010Special Section: Best papers from the APSEC.","DOI":"10.1016\/j.infsof.2010.12.010"},{"key":"e_1_3_3_2_37_2","unstructured":"Junan Zhang Kaifeng Huang Bihuan Chen Chong Wang Zhenhao Tian and Xin Peng. 2023. Malicious Package Detection in NPM and PyPI using a Single Model of Malicious Behavior Sequence. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.02637 (2023)."}],"event":{"name":"EASE '25: Evaluation and Assessment in Software Engineering","location":"Istanbul Turkiye","acronym":"EASE '25"},"container-title":["Proceedings of the 29th International Conference on Evaluation and Assessment in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3756681.3757042","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:39:19Z","timestamp":1766565559000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3756681.3757042"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":36,"alternative-id":["10.1145\/3756681.3757042","10.1145\/3756681"],"URL":"https:\/\/doi.org\/10.1145\/3756681.3757042","relation":{},"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"2025-12-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}