{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T02:27:03Z","timestamp":1773800823056,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":111,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,15]],"date-time":"2024-04-15T00:00:00Z","timestamp":1713139200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2107230"],"award-info":[{"award-number":["2107230"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2229703"],"award-info":[{"award-number":["2229703"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2107020"],"award-info":[{"award-number":["2107020"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2104319"],"award-info":[{"award-number":["2104319"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Faculty Research Participation Program at Argonne National Laboratory"},{"name":"NSERC (Natural Sciences and Engineering Research Council of Canada)","award":["RGPIN-2019-0507"],"award-info":[{"award-number":["RGPIN-2019-0507"]}]},{"name":"U.S. DOE Office of Science-Advanced Scientific Computing Research Program","award":["DE- AC02-06CH11357"],"award-info":[{"award-number":["DE- AC02-06CH11357"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,15]]},"DOI":"10.1145\/3643991.3644907","type":"proceedings-article","created":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T13:05:13Z","timestamp":1719925513000},"page":"431-443","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["PeaTMOSS: A Dataset and Initial Analysis of Pre-Trained Models in Open-Source Software"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2608-8576","authenticated-orcid":false,"given":"Wenxin","family":"Jiang","sequence":"first","affiliation":[{"name":"Purdue University, West Lafayette, Indiana, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9831-714X","authenticated-orcid":false,"given":"Jerin","family":"Yasmin","sequence":"additional","affiliation":[{"name":"Queen's University, Kingston, Ontario, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7088-0597","authenticated-orcid":false,"given":"Jason","family":"Jones","sequence":"additional","affiliation":[{"name":"Purdue University, West Lafayette, Indiana, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0413-4594","authenticated-orcid":false,"given":"Nicholas","family":"Synovic","sequence":"additional","affiliation":[{"name":"Loyola University Chicago, Chicago, Illinois, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1431-2249","authenticated-orcid":false,"given":"Jiashen","family":"Kuo","sequence":"additional","affiliation":[{"name":"Purdue University, West Lafayette, Indiana, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1453-9168","authenticated-orcid":false,"given":"Nathaniel","family":"Bielanski","sequence":"additional","affiliation":[{"name":"Purdue University, West Lafayette, Indiana, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2208-3893","authenticated-orcid":false,"given":"Yuan","family":"Tian","sequence":"additional","affiliation":[{"name":"Queen's University, Kingston, Ontario, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0452-5571","authenticated-orcid":false,"given":"George K.","family":"Thiruvathukal","sequence":"additional","affiliation":[{"name":"Loyola University Chicago, Chicago, Illinois, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2495-686X","authenticated-orcid":false,"given":"James C.","family":"Davis","sequence":"additional","affiliation":[{"name":"Purdue University, West Lafayette, Indiana, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. ChatGPT. https:\/\/chat.openai.com"},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. CodeScan: Code Quality and Security for Salesforce. https:\/\/www.codescan.io."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. npm: Build Amazing Things. https:\/\/www.npmjs.com\/."},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. PyPI: The Python Package Index. https:\/\/pypi.org\/."},{"key":"e_1_3_2_1_5_1","unstructured":"2022. License compatibility. https:\/\/en.wikipedia.org\/wiki\/License_compatibility"},{"key":"e_1_3_2_1_6_1","unstructured":"2023. NGC Catalog - GPU-optimized AI Machine Learning & HPC Software. https:\/\/catalog.ngc.nvidia.com."},{"key":"e_1_3_2_1_7_1","unstructured":"Meta AI. 2024. Papers With Code. https:\/\/paperswithcode.com\/about"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER56733.2023.00080"},{"key":"e_1_3_2_1_9_1","volume-title":"Software Engineering for Machine Learning: A Case Study. In International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP).","author":"Amershi Saleema","year":"2019","unstructured":"Saleema Amershi, Andrew Begel, Christian Bird, Robert DeLine, and Harald Gall. 2019. Software Engineering for Machine Learning: A Case Study. In International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP)."},{"key":"e_1_3_2_1_10_1","volume-title":"Data augmentation generative adversarial networks. arXiv:1711.04340","author":"Antoniou Antreas","year":"2017","unstructured":"Antreas Antoniou, Amos Storkey, and Harrison Edwards. 2017. Data augmentation generative adversarial networks. arXiv:1711.04340 (2017)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2942288"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196398.3196430"},{"key":"e_1_3_2_1_13_1","volume-title":"Usage and attribution of Stack Over-flow code snippets in GitHub projects. EMSE","author":"Baltes Sebastian","year":"2019","unstructured":"Sebastian Baltes and Stephan Diehl. 2019. Usage and attribution of Stack Over-flow code snippets in GitHub projects. EMSE (2019)."},{"key":"e_1_3_2_1_14_1","volume-title":"My Tests Broke the Build: An Explorative Analysis of Travis CI with GitHub. In International Conference on Mining Software Repositories (MSR).","author":"Beller Moritz","year":"2017","unstructured":"Moritz Beller, Georgios Gousios, and Andy Zaidman. 2017. Oops, My Tests Broke the Build: An Explorative Analysis of Travis CI with GitHub. In International Conference on Mining Software Repositories (MSR)."},{"key":"e_1_3_2_1_15_1","volume-title":"TravisTorrent: Synthesizing Travis CI and GitHub for Full-Stack Research on Continuous Integration. In International Conference on Mining Software Repositories (MSR).","author":"Beller Moritz","year":"2017","unstructured":"Moritz Beller, Georgios Gousios, and Andy Zaidman. 2017. TravisTorrent: Synthesizing Travis CI and GitHub for Full-Stack Research on Continuous Integration. In International Conference on Mining Software Repositories (MSR)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-022-10282-8"},{"key":"e_1_3_2_1_17_1","volume-title":"Conf. on Software Maintenance and Evolution (ICSME). IEEE.","author":"Borges Hudson","year":"2016","unstructured":"Hudson Borges, Andre Hora, and Marco Tulio Valente. 2016. Understanding the factors that impact the popularity of GitHub repositories. In Internat'l. Conf. on Software Maintenance and Evolution (ICSME). IEEE."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110542"},{"key":"e_1_3_2_1_19_1","volume-title":"Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang.","author":"Bubeck S\u00e9bastien","year":"2023","unstructured":"S\u00e9bastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang. 2023. Sparks of Artificial General Intelligence: Early experiments with GPT-4. arXiv:2303.12712"},{"key":"e_1_3_2_1_20_1","volume-title":"Prevalence of Code Smells in Reinforcement Learning Projects. arXiv:2303.10236","author":"Cardozo Nicol\u00e1s","year":"2023","unstructured":"Nicol\u00e1s Cardozo, Ivana Dusparic, and Christian Cabrera. 2023. Prevalence of Code Smells in Reinforcement Learning Projects. arXiv:2303.10236 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Analyzing the Evolution and Maintenance of ML Models on Hugging Face. arXiv","author":"Casta\u00f1o Joel","year":"2023","unstructured":"Joel Casta\u00f1o, Silverio Mart\u00ednez-Fern\u00e1ndez, Xavier Franch, and Justus Bogner. 2023. Analyzing the Evolution and Maintenance of ML Models on Hugging Face. arXiv (2023). https:\/\/arxiv.org\/abs\/2311.13380"},{"key":"e_1_3_2_1_22_1","volume-title":"Exploring the Carbon Footprint of Hugging Face's ML Models: A Repository Mining Study. arXiv","author":"Casta\u00f1o Joel","year":"2023","unstructured":"Joel Casta\u00f1o, Silverio Mart\u00ednez-Fern\u00e1ndez, Xavier Franch, and Justus Bogner. 2023. Exploring the Carbon Footprint of Hugging Face's ML Models: A Repository Mining Study. arXiv (2023). https:\/\/arxiv.org\/pdf\/2305.11164.pdf"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533143"},{"key":"e_1_3_2_1_24_1","volume-title":"Javier L C\u00e1novas Izquierdo, and Jordi Cabot","author":"Cosentino Valerio","year":"2017","unstructured":"Valerio Cosentino, Javier L C\u00e1novas Izquierdo, and Jordi Cabot. 2017. A systematic mapping study of software development with GitHub. IEEE Access (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2024.241015"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the IEEE John Vincent Atanasoff Symposium on Modern Computing (JVA'23)","author":"Davis James C.","unstructured":"James C. Davis, Purvish Jajal, Wenxin Jiang, Taylor R. Schorlemmer, Nicholas Synovic, and George K. Thiruvathukal. 2023. Reusing Deep Learning Models: Challenges and Directions in Software Engineering. In Proceedings of the IEEE John Vincent Atanasoff Symposium on Modern Computing (JVA'23)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2976475"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453478"},{"key":"e_1_3_2_1_29_1","volume-title":"Automatic Labeling of Data for Transfer Learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops.","author":"Dube Parijat","year":"2019","unstructured":"Parijat Dube, Bishwaranjan Bhattacharjee, Siyu Huo, Patrick Watson, and Brian Belgodere. 2019. Automatic Labeling of Data for Transfer Learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops."},{"key":"e_1_3_2_1_30_1","volume-title":"Structured information extraction from complex scientific text with fine-tuned large language models. arXiv preprint arXiv:2212.05238","author":"Dunn Alexander","year":"2022","unstructured":"Alexander Dunn, John Dagdelen, Nicholas Walker, Sanghoon Lee, Andrew S Rosen, Gerbrand Ceder, Kristin Persson, and Anubhav Jain. 2022. Structured information extraction from complex scientific text with fine-tuned large language models. arXiv preprint arXiv:2212.05238 (2022)."},{"key":"e_1_3_2_1_31_1","volume-title":"ACM Join Meeting on European Software Eng. Conf. and Sympos. on the Found. of Software Eng.(ESEC\/FSE).","author":"Eck M.","unstructured":"M. Eck, F. Palomba, M. Castelluccio, and A. Bacchelli. 2019. Understanding flaky tests: The developer's perspective. In ACM Join Meeting on European Software Eng. Conf. and Sympos. on the Found. of Software Eng.(ESEC\/FSE)."},{"key":"e_1_3_2_1_32_1","volume-title":"International Symposium on Software Testing and Analysis (ISSTA).","author":"Elsner D.","unstructured":"D. Elsner, F. Hauer, A. Pretschner, and S. Reimer. 2021. Empirically evaluating readily available information for regression test optimization in continuous integration. In International Symposium on Software Testing and Analysis (ISSTA)."},{"key":"e_1_3_2_1_33_1","unstructured":"Hugging Face. 2021. Hugging Face - The AI community building the future. https:\/\/huggingface.co\/"},{"key":"e_1_3_2_1_34_1","unstructured":"Hugging Face. 2023. Hugging Face Hub Library Documentation. https:\/\/github.com\/HuggingFace\/hub-docs\/blob\/main\/js\/src\/lib\/\/interfaces\/Libraries.ts"},{"key":"e_1_3_2_1_35_1","first-page":"1","article-title":"What makes a popular academic AI repository","volume":"26","author":"Fan Yuanrui","year":"2021","unstructured":"Yuanrui Fan, Xin Xia, David Lo, Ahmed E Hassan, and Shanping Li. 2021. What makes a popular academic AI repository? EMSE 26 (2021), 1--35.","journal-title":"EMSE"},{"key":"e_1_3_2_1_36_1","volume-title":"International Conf. on Machine Learning (ICML).","author":"Geiping Jonas","year":"2023","unstructured":"Jonas Geiping and Tom Goldstein. 2023. Cramming: Training a Language Model on a single GPU in one day. In International Conf. on Machine Learning (ICML)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2012.50"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPC.2010.48"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2305016120"},{"key":"e_1_3_2_1_40_1","unstructured":"GitHub. 2022. GitHub REST API Documentation on Licenses. https:\/\/docs.github.com\/en\/rest\/licenses\/licenses. API version: 2022-11-28."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387455"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3569934"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Georgios Gousios and Diomidis Spinellis. 2012. GHTorrent: Github's data from a firehose. In Internat'l Working Conf. on Mining Software Repositories (MSR).","DOI":"10.1109\/MSR.2012.6224294"},{"key":"e_1_3_2_1_44_1","volume-title":"Naming Practices in Object-oriented Programming: An Empirical Study. Journal of Software Engineering Research and Development","author":"Gresta Remo","year":"2023","unstructured":"Remo Gresta, Vinicius Durelli, and Elder Cirilo. 2023. Naming Practices in Object-oriented Programming: An Empirical Study. Journal of Software Engineering Research and Development (2023), 5--1."},{"key":"e_1_3_2_1_45_1","volume-title":"Threats to pre-trained language models: Survey and taxonomy. arXiv preprint arXiv:2202.06862","author":"Guo Shangwei","year":"2022","unstructured":"Shangwei Guo, Chunlong Xie, Jiwei Li, Lingjuan Lyu, and Tianwei Zhang. 2022. Threats to pre-trained language models: Survey and taxonomy. arXiv preprint arXiv:2202.06862 (2022)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.08.002"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11219-016-9344-4"},{"key":"e_1_3_2_1_48_1","unstructured":"Hugging Face. 2023. Repositories and Licenses. https:\/\/huggingface.co\/docs\/hub\/repositories-licenses."},{"key":"e_1_3_2_1_49_1","volume-title":"Analysis of Failures and Risks in Deep Learning Model Converters: A Case Study in the ONNX Ecosystem. arXiv","author":"Jajal Purvish","year":"2023","unstructured":"Purvish Jajal, Wenxin Jiang, Arav Tewari, Joseph Woo, Yung-Hsiang Lu, George K Thiruvathukal, and James C Davis. 2023. Analysis of Failures and Risks in Deep Learning Model Converters: A Case Study in the ONNX Ecosystem. arXiv (2023). https:\/\/arxiv.org\/abs\/2303.17708"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAA58325.2023.00029"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00107"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"W. Jiang V. Banna N. Vivek A. Goel N. Synovic G.K. Thiruvathukal and J.C. Davis. 2023. Challenges and practices of deep learning model reengineering: A case study on computer vision. arXiv (2023).","DOI":"10.1007\/s10664-024-10521-0"},{"key":"e_1_3_2_1_53_1","volume-title":"Exploring Naming Conventions (and Defects) of Pre-trained Deep Learning Models in Hugging Face and Other Model Hubs. arXiv:2310","author":"Jiang Wenxin","year":"2023","unstructured":"Wenxin Jiang, Chingwo Cheung, George K. Thiruvathukal, and James C. Davis. 2023. Exploring Naming Conventions (and Defects) of Pre-trained Deep Learning Models in Hugging Face and Other Model Hubs. arXiv:2310.01642 (2023)."},{"key":"e_1_3_2_1_54_1","volume-title":"Davis","author":"Schorlemmer Taylor R.","year":"2023","unstructured":"Wenxin. Jiang, Nicholas. Synovic, Matt. Hyatt, Taylor R. Schorlemmer, Rohan. Sethi, Yung-Hsiang Lu, George K. Thiruvathukal, and James C. Davis. 2023. An Empirical Study of Pre-Trained Model Reuse in the Hugging Face Deep Learning Model Registry. In ICSE."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"W. Jiang N. Synovic P. Jajal T.R. Schorlemmer A. Tewari B. Pareek G.K. Thiruvathukal and J.C. Davis. 2023. PTMTorrent: A Dataset for Mining Open-source Pre-trained Model Packages. MSR (2023).","DOI":"10.1109\/MSR59073.2023.00021"},{"key":"e_1_3_2_1_56_1","volume-title":"An Empirical Study of Artifacts and Security Risks in the Pre-Trained Model Supply Chain. In ACM Workshop on Software Supply Chain Offensive Research and Ecosystem Defenses (SCORED).","author":"Jiang W.","unstructured":"W. Jiang, N. Synovic, R. Sethi, A. Indarapu, M. Hyatt, T.R. Schorlemmer, G.K. Thiruvathukal, and J.C. Davis. 2022. An Empirical Study of Artifacts and Security Risks in the Pre-Trained Model Supply Chain. In ACM Workshop on Software Supply Chain Offensive Research and Ecosystem Defenses (SCORED)."},{"key":"e_1_3_2_1_57_1","volume-title":"Ammus: A survey of transformer-based pretrained models in natural language processing. arXiv","author":"Kalyan Katikapalli Subramanyam","year":"2021","unstructured":"Katikapalli Subramanyam Kalyan, Ajit Rajasekharan, and Sivanesan Sangeetha. 2021. Ammus: A survey of transformer-based pretrained models in natural language processing. arXiv (2021)."},{"key":"e_1_3_2_1_58_1","unstructured":"Andrej Karpathy. 2017. Software 2.0. https:\/\/karpathy.medium.com\/software-2-0-a64152b37c35. (2017) 1--8."},{"key":"e_1_3_2_1_59_1","volume-title":"Intern. Conf. on Intelligence and Security Informatics.","author":"Kathikar A.","unstructured":"A. Kathikar, A. Nair, B. Lazarine, A. Sachdeva, and S. Samtani. 2023. Assessing the vulnerabilities of the open-source artificial intelligence (AI) landscape: A large-scale analysis of the Hugging Face platform. In Intern. Conf. on Intelligence and Security Informatics."},{"key":"e_1_3_2_1_60_1","unstructured":"P. Kuckertz J. G\u00f6pfert O. Karras D. Neuroth J. Sch\u00f6nau R. Pueblas S. Ferenz F. Engel N. Pflugradt J.M. Weinand A. Nie\u00dfe S. Auer and D. Stolten. 2023. A Metadata-Based Ecosystem to Improve the FAIRness of Research Software. http:\/\/arxiv.org\/abs\/2306.10620"},{"key":"e_1_3_2_1_61_1","volume-title":"Understanding open source and free software licensing: guide to navigating licensing issues in existing & new software","author":"St Laurent Andrew","unstructured":"Andrew MSt Laurent. 2004. Understanding open source and free software licensing: guide to navigating licensing issues in existing & new software. O'Reilly Media."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"e_1_3_2_1_63_1","unstructured":"P. Lewis E. Perez A. Piktus F. Petroni V. Karpukhin N. Goyal et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Adv. in Neural Information Processing Systems (NeurIPS) (2020)."},{"key":"e_1_3_2_1_64_1","volume-title":"Scalpel: The Python Static Analysis Framework. arXiv preprint arXiv:2202.11840","author":"Li Li","year":"2022","unstructured":"Li Li, Jiawei Wang, and Haowei Quan. 2022. Scalpel: The Python Static Analysis Framework. arXiv preprint arXiv:2202.11840 (2022)."},{"key":"e_1_3_2_1_65_1","volume-title":"Testing Machine Learning Systems in Industry: An Empirical Study. In International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP). 263--272","author":"Li S.","unstructured":"S. Li, J. Guo, J.G. Lou, M. Fan, T. Liu, and D. Zhang. 2022. Testing Machine Learning Systems in Industry: An Empirical Study. In International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP). 263--272."},{"key":"e_1_3_2_1_66_1","volume-title":"ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA). 139--151","author":"Li Y.","unstructured":"Y. Li, Z. Zhang, B. Liu, Z. Yang, and Y. Liu. 2021. ModelDiff: Testing-based DNN similarity comparison for model reuse detection. In ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA). 139--151."},{"key":"e_1_3_2_1_67_1","unstructured":"Ziyu Li Rihan Hai Alessandro Bozzon and Asterios Katsifodimos. 2022. Metadata Representations for Queryable ML Model Zoos. arXiv:2207.09315"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11219-019-09476-z"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-08129-3_6"},{"key":"e_1_3_2_1_70_1","unstructured":"Pedro Marcelino. 2022. Transfer learning from pre-trained models. https:\/\/towardsdatascience.com\/transfer-learning-from-pre-trained-models-f2393f124751"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Diego Montes Pongpatapee Peerapatanapokin Jeff Schultz Chengjun Guo Wenxin Jiang and James C Davis. 2022. Discrepancies among pre-trained deep neural networks: a new threat to model zoo reliability. In ESEC\/FSE-IVR track.","DOI":"10.1145\/3540250.3560881"},{"key":"e_1_3_2_1_72_1","volume-title":"Bug Characterization in Machine Learning-based Systems. arXiv","author":"Morovati Mohammad Mehdi","year":"2023","unstructured":"Mohammad Mehdi Morovati, Amin Nikanjam, Florian Tambon, Foutse Khomh, and Zhen Ming. 2023. Bug Characterization in Machine Learning-based Systems. arXiv (2023). https:\/\/arxiv.org\/abs\/2307.14512"},{"key":"e_1_3_2_1_73_1","volume-title":"A Dataset and Analysis of Open-Source Machine Learning Products. arXiv preprint arXiv:2308.04328","author":"Nahar Nadia","year":"2023","unstructured":"Nadia Nahar, Haoran Zhang, Grace Lewis, Shurui Zhou, and Christian K\u00e4stner. 2023. A Dataset and Analysis of Open-Source Machine Learning Products. arXiv preprint arXiv:2308.04328 (2023)."},{"key":"e_1_3_2_1_74_1","volume-title":"A Brief Timeline of NLP: From Bag of Words to the Transformer Family. Medium","year":"2021","unstructured":"NLPlanet. 2021. A Brief Timeline of NLP: From Bag of Words to the Transformer Family. Medium (2021). https:\/\/medium.com\/nlplanet\/a-brief-timeline-of-nlp-from-bag-of-words-to-the-transformer-family-7caad8bbba56"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","unstructured":"David Patterson Joseph Gonzalez Quoc Le Chen Liang Lluis-Miquel Munguia Daniel Rothchild David So Maud Texier and Jeff Dean. 2021. Carbon Emissions and Large Neural Network Training. 10.48550\/arXiv.2104.10350","DOI":"10.48550\/arXiv.2104.10350"},{"key":"e_1_3_2_1_77_1","volume-title":"Quantitative ai risk assessments: Opportunities and challenges. arXiv","author":"Piorkowski David","year":"2023","unstructured":"David Piorkowski, Michael Hind, and John Richards. 2023. Quantitative ai risk assessments: Opportunities and challenges. arXiv (2023)."},{"key":"e_1_3_2_1_78_1","unstructured":"Pytorch. 2021. PyTorch Hub. https:\/\/pytorch.org\/hub\/"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00090"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.2197\/ipsjjip.29.296"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1906.07154"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372873"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.3390\/info11040193"},{"key":"e_1_3_2_1_84_1","volume-title":"Open source licensing. Software Freedom and Intellectual Property Law","author":"Rosen Lawrence","year":"2005","unstructured":"Lawrence Rosen. 2005. Open source licensing. Software Freedom and Intellectual Property Law (2005)."},{"key":"e_1_3_2_1_85_1","volume-title":"Interpersonal Trust in OSS: Exploring Dimensions of Trust in GitHub Pull Requests. In International Conference on Software Engineering: New Ideas and Emerging Results (ICSE-NIER).","author":"Sajadi A.","unstructured":"A. Sajadi, K. Damevski, and P. Chatterjee. 2023. Interpersonal Trust in OSS: Exploring Dimensions of Trust in GitHub Pull Requests. In International Conference on Software Engineering: New Ideas and Emerging Results (ICSE-NIER)."},{"key":"e_1_3_2_1_86_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Schelter S.","unstructured":"S. Schelter, J.H. Boese, J. Kirschnick, T. Klein, and S. Seufert. 2017. Automatically tracking metadata and provenance of machine learning experiments. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_87_1","volume-title":"Kyung Myung Ko, et al","author":"Schorlemmer Taylor R","year":"2024","unstructured":"Taylor R Schorlemmer, Kelechi G Kalu, Luke Chigges, Kyung Myung Ko, et al. 2024. Signing in Four Public Software Package Registries: Quantity, Quality, and Influencing Factors. arXiv:2401.14635"},{"key":"e_1_3_2_1_88_1","unstructured":"Y. Shen K. Song X. Tan D. Li W. Lu and Y. Zhuang. 2023. HuggingGPT: Solving AI tasks with ChatGPT and its Friends in HuggingFace. arXiv:2303.17580 (2023)."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"crossref","unstructured":"S.S. Sohail F. Farhat Y. Himeur M. Nadeem D.O. Madsen Y. Singh S. Atalla and W. Mansoor. 2023. Decoding ChatGPT: a taxonomy of existing research current challenges and possible future directions. Journal of King Saud University-Computer and Information Sciences (2023).","DOI":"10.2139\/ssrn.4413921"},{"key":"e_1_3_2_1_90_1","first-page":"287","article-title":"Understanding the drivers of unethical programming behavior: The inappropriate reuse of internet-accessible code. Journal of Management Info","volume":"31","author":"Sojer M.","year":"2014","unstructured":"M. Sojer, O. Alexy, S. Kleinknecht, and J. Henkel. 2014. Understanding the drivers of unethical programming behavior: The inappropriate reuse of internet-accessible code. Journal of Management Info. Systems 31, 3 (2014), 287--325.","journal-title":"Systems"},{"key":"e_1_3_2_1_91_1","unstructured":"Sourcegraph. 2023. https:\/\/docs.sourcegraph.com\/cli"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510199"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"crossref","unstructured":"Mina Taraghi Gianolli Dorcelus Armstrong Foundjem Florian Tambon and Foutse Khomh. 2024. Deep Learning Model Reuse in the HuggingFace Community: Challenges Benefit and Trends. arXiv:2401.13177","DOI":"10.1109\/SANER60148.2024.00059"},{"key":"e_1_3_2_1_94_1","unstructured":"The Linux Foundation. 2019. Fulfilling Open Source License Obligations: Can Checklists Help? https:\/\/events19.linuxfoundation.org\/wp-content\/uploads\/2018\/07\/OSLS-2019-Fulfilling-Open-Source-license-obligations-Can-checklists-help.pdf."},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387448"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-022-10206-6"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"crossref","unstructured":"S. Van Der Burg E. Dolstra S. McIntosh J. Davies D.M. German and A. Hemel. 2014. Tracing software build processes to uncover license compliance inconsistencies. In Automated software engineering (ASE). 731--742.","DOI":"10.1145\/2642937.2643013"},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1109\/WAIN52551.2021.00011"},{"key":"e_1_3_2_1_99_1","unstructured":"Chengcheng Wan Shicheng Liu Henry Hoffmann Michael Maire and Shan Lu. 2021. Are machine learning cloud apis used correctly?. In ICSE."},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2020.3000900"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2022.102807"},{"key":"e_1_3_2_1_102_1","volume-title":"Open source license inconsistencies on github. ACM TOSEM 32, 5","author":"Wolter Thomas","year":"2023","unstructured":"Thomas Wolter, Ann Barcomb, Dirk Riehle, and Nikolay Harutyunyan. 2023. Open source license inconsistencies on github. ACM TOSEM 32, 5 (2023)."},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/760"},{"key":"e_1_3_2_1_104_1","volume-title":"Understanding and Remediating Open-Source License Incompatibilities in the PyPI Ecosystem. arXiv preprint arXiv:2308.05942","author":"Xu Weiwei","year":"2023","unstructured":"Weiwei Xu, Hao He, Kai Gao, and Minghui Zhou. 2023. Understanding and Remediating Open-Source License Incompatibilities in the PyPI Ecosystem. arXiv preprint arXiv:2308.05942 (2023)."},{"key":"e_1_3_2_1_105_1","volume-title":"What Do Users Ask in Open-Source AI Repositories? An Empirical Study of GitHub Issues. arXiv","author":"Yang Zhou","year":"2023","unstructured":"Zhou Yang, Chenyu Wang, Jieke Shi, Thong Hoang, Pavneet Kochhar, Qinghua Lu, Zhenchang Xing, and David Lo. 2023. What Do Users Ask in Open-Source AI Repositories? An Empirical Study of GitHub Issues. arXiv (2023)."},{"key":"e_1_3_2_1_106_1","doi-asserted-by":"publisher","DOI":"10.1145\/3324884.3416640"},{"key":"e_1_3_2_1_107_1","volume-title":"Conference on Human Factors in Computing Systems (CHI).","author":"Zamfirescu-Pereira J.D.","unstructured":"J.D. Zamfirescu-Pereira, R.Y. Wong, B. Hartmann, and Q. Yang. 2023. Why Johnny can't prompt: how non-AI experts try (and fail) to design LLM prompts. In Conference on Human Factors in Computing Systems (CHI)."},{"key":"e_1_3_2_1_108_1","volume-title":"How Open Source Projects Use Static Code Analysis Tools in Continuous Integration Pipelines. In International Conference on Mining Software Repositories (MSR).","author":"Zampetti F.","unstructured":"F. Zampetti, S. Scalabrino, R. Oliveto, G. Canfora, and M. Di Penta. 2017. How Open Source Projects Use Static Code Analysis Tools in Continuous Integration Pipelines. In International Conference on Mining Software Repositories (MSR)."},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-90421-4_6"},{"key":"e_1_3_2_1_110_1","doi-asserted-by":"crossref","unstructured":"Haiyin Zhang Lu\u00eds Cruz and Arie Van Deursen. 2022. Code smells for machine learning applications. In Internat'l Conf. on AI Eng.: Software Eng. for AI. 217--228.","DOI":"10.1145\/3522664.3528620"},{"key":"e_1_3_2_1_111_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510191"},{"key":"e_1_3_2_1_112_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"}],"event":{"name":"MSR '24: 21st International Conference on Mining Software Repositories","location":"Lisbon Portugal","acronym":"MSR '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"]},"container-title":["Proceedings of the 21st International Conference on Mining Software Repositories"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643991.3644907","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643991.3644907","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643991.3644907","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:56:44Z","timestamp":1750291004000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643991.3644907"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,15]]},"references-count":111,"alternative-id":["10.1145\/3643991.3644907","10.1145\/3643991"],"URL":"https:\/\/doi.org\/10.1145\/3643991.3644907","relation":{},"subject":[],"published":{"date-parts":[[2024,4,15]]},"assertion":[{"value":"2024-07-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}