{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:49Z","timestamp":1750309609668,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707756","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:26:21Z","timestamp":1747247181000},"page":"1617-1626","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Machine Learning-Based Approach For Detecting Malicious PyPI Packages"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4192-718X","authenticated-orcid":false,"given":"Haya","family":"Samaana","sequence":"first","affiliation":[{"name":"An Najah National University, Nablus, Palestine, Palestine"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7084-2594","authenticated-orcid":false,"given":"Diego Elias","family":"Costa","sequence":"additional","affiliation":[{"name":"Concordia University, Montreal, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1285-9878","authenticated-orcid":false,"given":"Emad","family":"Shihab","sequence":"additional","affiliation":[{"name":"Concordia University, Montreal, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1863-9147","authenticated-orcid":false,"given":"Ahmad","family":"Abdellatif","sequence":"additional","affiliation":[{"name":"University of Calgary, Calgary, Canada"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"(accessed: 23.07.2024). Bertusk. https:\/\/bertusk.medium.com\/discord-token-stealer-discovered-in-pypi-repository-e65ed9c3de06"},{"key":"e_1_3_2_1_2_1","unstructured":"(accessed: 23.07.2024). dateutil. https:\/\/snyk.io\/blog\/malicious-packages-found-to-be-typo-squatting-in-pypi\/"},{"key":"e_1_3_2_1_3_1","unstructured":"Accessed on 21\/7\/2024. Libraries- the open source discovery service. https:\/\/libraries.io\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2020.2967380"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER50967.2021.00048"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btq134"},{"key":"e_1_3_2_1_7_1","unstructured":"Anonymous. 2024. A Machine Learning-Based Approach For Detecting Malicious PYPI Packages | Zenodo. https:\/\/zenodo.org\/records\/13825064."},{"key":"e_1_3_2_1_8_1","unstructured":"bandit. (accessed: 27.07.2024). bandit. https:\/\/github.com\/PyCQA\/bandit"},{"key":"e_1_3_2_1_9_1","unstructured":"bandit4mal. (accessed: 20.07.2024). bandit4mal. https:\/\/github.com\/lyvd\/bandit4mal"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/236156.236184"},{"key":"e_1_3_2_1_11_1","unstructured":"bleepingcomputer. (accessed: 12.08.2024). cryptocurrency. https:\/\/www.bleepingcomputer.com\/news\/security\/malicious-pypi-packages-hijack-dev-devices-to-mine-cryptocurrency\/"},{"key":"e_1_3_2_1_12_1","volume-title":"Pearson's r and coarsely categorized measures. American Sociological Review","author":"Bollen Kenneth A","year":"1981","unstructured":"Kenneth A Bollen and Kenney H Barb. 1981. Pearson's r and coarsely categorized measures. American Sociological Review (1981), 232\u2013239."},{"key":"e_1_3_2_1_13_1","unstructured":"botaa3. (accessed: 20.07.2024). botaa3. https:\/\/blog.sonatype.com\/another-day-of-malware-malicious-botaa3-pypi-package"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963436"},{"key":"e_1_3_2_1_15_1","volume-title":"Takashi Ishio, and Kenichi Matsumoto.","author":"Chinthanet Bodin","year":"2021","unstructured":"Bodin Chinthanet, Brittany Reid, Christoph Treude, Markus Wagner, Raula Gaikovina Kula, Takashi Ishio, and Kenichi Matsumoto. 2021. What makes a good Node. js package? Investigating Users, Contributors, and Runnability. arXiv preprint arXiv:2106.12239 (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"Keras: The python deep learning library. Astrophysics source code library","author":"Fran\u00e7ois Chollet","year":"2018","unstructured":"Fran\u00e7ois Chollet et al. 2018. Keras: The python deep learning library. Astrophysics source code library (2018), ascl-1806."},{"key":"e_1_3_2_1_17_1","unstructured":"colourama. (accessed: 23.07.2024). colourama. https:\/\/bertusk.medium.com\/cryptocurrency-clipboard-hijacker-discovered-in-pypi-repository-b66b8a534a8"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196398.3196401"},{"key":"e_1_3_2_1_19_1","unstructured":"domains. (accessed: 20.06.2024). domains. http:\/\/s3.amazonaws.com\/alexa-static\/top-1m.csv.zip"},{"key":"e_1_3_2_1_20_1","volume-title":"Ryan Elder, Brendan Saltaformaggio, and Wenke Lee.","author":"Duan Ruian","year":"2020","unstructured":"Ruian Duan, Omar Alrawi, Ranjita Pai Kasturi, Ryan Elder, Brendan Saltaformaggio, and Wenke Lee. 2020. Measuring and preventing supply chain attacks on package managers. arXiv preprint arXiv:2002.01139 (2020), 18\u201352."},{"key":"e_1_3_2_1_21_1","volume-title":"Ryan Elder, Brendan Saltaformaggio, and Wenke Lee.","author":"Duan Ruian","year":"2020","unstructured":"Ruian Duan, Omar Alrawi, Ranjita Pai Kasturi, Ryan Elder, Brendan Saltaformaggio, and Wenke Lee. 2020. Towards measuring supply chain attacks on package managers for interpreted languages. arXiv preprint arXiv:2002.01139 (2020)."},{"key":"e_1_3_2_1_22_1","unstructured":"encode32. (accessed: 23.07.2024). encode32. https:\/\/jfrog.com\/blog\/jfrog-discloses-3-remote-access-trojans-in-pypi\/"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-NIER.2019.00012"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2021.110911"},{"volume-title":"Understanding dependencies: A study of the coordination challenges in software development","author":"Grinter Rebecca Elizabeth","key":"e_1_3_2_1_25_1","unstructured":"Rebecca Elizabeth Grinter. 1996. Understanding dependencies: A study of the coordination challenges in software development. University of California, Irvine."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46215.2023.10179332"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645543"},{"key":"e_1_3_2_1_28_1","volume-title":"Malicious web content detection by machine learning. expert systems with applications 37, 1","author":"Hou Yung-Tsung","year":"2010","unstructured":"Yung-Tsung Hou, Yimeng Chang, Tsuhan Chen, Chi-Sung Laih, and Chia-Mei Chen. 2010. Malicious web content detection by machine learning. expert systems with applications 37, 1 (2010), 55\u201360."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87839-9_6"},{"key":"e_1_3_2_1_30_1","first-page":"19","article-title":"Suspicious malicious web site detection with strength analysis of a javascript obfuscation","volume":"26","author":"Kim Byung-Ik","year":"2011","unstructured":"Byung-Ik Kim, Chae-Tae Im, and Hyun-Chul Jung. 2011. Suspicious malicious web site detection with strength analysis of a javascript obfuscation. International Journal of Advanced Science and Technology 26 (2011), 19\u201332.","journal-title":"International Journal of Advanced Science and Technology"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46215.2023.10179304"},{"key":"e_1_3_2_1_32_1","volume-title":"Malicious Packages Lurking in User-Friendly Python Package Index. In 2021 IEEE 20th International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom). IEEE, 606\u2013613","author":"Liang Genpei","year":"2021","unstructured":"Genpei Liang, Xiangyu Zhou, Qingyu Wang, Yutong Du, and Cheng Huang. 2021. Malicious Packages Lurking in User-Friendly Python Package Index. In 2021 IEEE 20th International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom). IEEE, 606\u2013613."},{"key":"e_1_3_2_1_33_1","unstructured":"licenses. (accessed: 20.07.2024). licenses. https:\/\/blog.inedo.com\/python\/python-package-licenses"},{"key":"e_1_3_2_1_34_1","unstructured":"maloss. (accessed: 1.08.2024). maloss tool. https:\/\/github.com\/osssanitizer\/maloss"},{"key":"e_1_3_2_1_35_1","unstructured":"malwarecheck. (accessed: 20.07.2024). malwarecheck. https:\/\/warehouse.pypa.io\/development\/malware-checks.html"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/998675.999433"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3538969.3544415"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52683-2_2"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3407023.3409183"},{"key":"e_1_3_2_1_40_1","unstructured":"OSSGadget. (accessed: 20.07.2024). OSSGadget. https:\/\/github.com\/microsoft\/OSSGadget"},{"key":"e_1_3_2_1_41_1","unstructured":"packj. (accessed: 20.07.2024). packj. https:\/\/github.com\/ossillate-inc\/packj"},{"key":"e_1_3_2_1_42_1","volume-title":"Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12","author":"Pedregosa Fabian","year":"2011","unstructured":"Fabian Pedregosa, Ga\u00ebl Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, et al. 2011. Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12 (2011), 2825\u20132830."},{"key":"e_1_3_2_1_43_1","unstructured":"pymafka. (accessed: 20.08.2024). pymafka. https:\/\/www.bleepingcomputer.com\/news\/security\/malicious-pypi-package-opens-backdoors-on-windows-linux-and-macs\/"},{"volume-title":"The Hitchhiker's guide to Python: best practices for development. \"O'Reilly Media","author":"Reitz Kenneth","key":"e_1_3_2_1_44_1","unstructured":"Kenneth Reitz and Tanya Schlusser. 2016. The Hitchhiker's guide to Python: best practices for development. \"O'Reilly Media, Inc.\"."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.5555\/2011216.2011217"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/PST52912.2021.9647791"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3538969.3543815"},{"key":"e_1_3_2_1_48_1","volume-title":"Practical Automated Detection of Malicious npm Packages. arXiv preprint arXiv:2202.13953","author":"Sejfia Adriana","year":"2022","unstructured":"Adriana Sejfia and Max Sch\u00e4fer. 2022. Practical Automated Detection of Malicious npm Packages. arXiv preprint arXiv:2202.13953 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Lorenzo De Carli, and Vaibhav Rastogi","author":"Taylor Matthew","year":"2020","unstructured":"Matthew Taylor, Ruturaj K Vaidya, Drew Davidson, Lorenzo De Carli, and Vaibhav Rastogi. 2020. Spellbound: Defending against package typosquatting. arXiv preprint arXiv:2003.03471 (2020)."},{"key":"e_1_3_2_1_50_1","unstructured":"thehackernews. (accessed: 12.07.2024). stealing. https:\/\/thehackernews.com\/2022\/08\/10-credential-stealing-python-libraries.html"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2884781.2884800"},{"key":"e_1_3_2_1_53_1","volume-title":"Drew Davidson, and Vaibhav Rastogi.","author":"Vaidya Ruturaj K","year":"2019","unstructured":"Ruturaj K Vaidya, Lorenzo De Carli, Drew Davidson, and Vaibhav Rastogi. 2019. Security issues in language-based software ecosystems. arXiv preprint arXiv:1903.02613 (2019)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.6947954"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3468592"},{"key":"e_1_3_2_1_56_1","volume-title":"A Benchmark Comparison of Python Malware Detection Approaches. arXiv preprint arXiv:2209.13288","author":"Vu Duc-Ly","year":"2022","unstructured":"Duc-Ly Vu, Zachary Newman, and John Speed Meyers. 2022. A Benchmark Comparison of Python Malware Detection Approaches. arXiv preprint arXiv:2209.13288 (2022)."},{"key":"e_1_3_2_1_57_1","volume-title":"Bad Snakes: Understanding and Improving Python Package Index Malware Scanning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). IEEE, 499\u2013511","author":"Vu Duc-Ly","year":"2023","unstructured":"Duc-Ly Vu, Zachary Newman, and John Speed Meyers. 2023. Bad Snakes: Understanding and Improving Python Package Index Malware Scanning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). IEEE, 499\u2013511."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3420015"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/EuroSPW51379.2020.00074"},{"key":"e_1_3_2_1_60_1","volume-title":"Understanding and Remediating Open-Source License Incompatibilities in the PyPI Ecosystem. arXiv preprint arXiv:2308.05942","author":"Xu Weiwei","year":"2023","unstructured":"Weiwei Xu, Hao He, Kai Gao, and Minghui Zhou. 2023. Understanding and Remediating Open-Source License Incompatibilities in the PyPI Ecosystem. arXiv preprint arXiv:2308.05942 (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/MALWARE.2012.6461002"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2435349.2435364"},{"key":"e_1_3_2_1_63_1","volume-title":"2022 IEEE\/ACM 44th International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP). IEEE, 331\u2013340","author":"Zahan Nusrat","year":"2022","unstructured":"Nusrat Zahan, Thomas Zimmermann, Patrice Godefroid, Brendan Murphy, Chandra Maddila, and Laurie Williams. 2022. What are weak links in the NPM supply chain?. In 2022 IEEE\/ACM 44th International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP). IEEE, 331\u2013340."},{"key":"e_1_3_2_1_64_1","volume-title":"Malicious Package Detection in NPM and PyPI using a Single Model of Malicious Behavior Sequence. arXiv preprint arXiv:2309.02637","author":"Zhang Junan","year":"2023","unstructured":"Junan Zhang, Kaifeng Huang, Bihuan Chen, Chong Wang, Zhenhao Tian, and Xin Peng. 2023. Malicious Package Detection in NPM and PyPI using a Single Model of Malicious Behavior Sequence. arXiv preprint arXiv:2309.02637 (2023)."},{"key":"e_1_3_2_1_65_1","volume-title":"28th USENIX Security Symposium (USENIX Security 19)","author":"Zimmermann Markus","year":"2019","unstructured":"Markus Zimmermann, Cristian-Alexandru Staicu, Cam Tenny, and Michael Pradel. 2019. Small world with high risks: A study of security threats in the npm ecosystem. In 28th USENIX Security Symposium (USENIX Security 19). 995\u20131010."}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Catania International Airport Catania Italy","acronym":"SAC '25"},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707756","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:32Z","timestamp":1750298252000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707756"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":64,"alternative-id":["10.1145\/3672608.3707756","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707756","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}