{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T02:27:52Z","timestamp":1773800872821,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T00:00:00Z","timestamp":1713225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,16]]},"DOI":"10.1145\/3643664.3648204","type":"proceedings-article","created":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T15:48:13Z","timestamp":1723218493000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Lessons Learned from Mining the Hugging Face Repository"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6385-9255","authenticated-orcid":false,"given":"Joel","family":"Casta\u00f1o","sequence":"first","affiliation":[{"name":"Universitat Politecnica de Catalunya, Barcelona, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9928-133X","authenticated-orcid":false,"given":"Silverio","family":"Mart\u00ednez-Fern\u00e1ndez","sequence":"additional","affiliation":[{"name":"Universitat Politecnica de Catalunya, Barcelona, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9733-8830","authenticated-orcid":false,"given":"Xavier","family":"Franch","sequence":"additional","affiliation":[{"name":"Universitat Politecnica de Catalunya, Barcelona, Spain"}]}],"member":"320","published-online":{"date-parts":[[2024,8,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Hugging Face Inc. \"Hugging Face Hub Documentation \" https:\/\/huggingface.co\/docs\/hub\/index 2023."},{"key":"e_1_3_2_1_2_1","volume-title":"Exploring the Carbon Footprint of Hugging Face's ML Models: A Repository Mining Study,\" in ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM)","author":"Casta\u00f1o J.","year":"2023","unstructured":"J. Casta\u00f1o, S. Mart\u00ednez-Fern\u00e1ndez, X. Franch, and J. Bogner, \"Exploring the Carbon Footprint of Hugging Face's ML Models: A Repository Mining Study,\" in ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM). New Orleans, LA, USA: IEEE, 2023."},{"key":"e_1_3_2_1_3_1","volume-title":"Analyzing the Evolution and Maintenance of ML Models on Hugging Face,\" in Proceedings of the 2024 IEEE\/ACM 21st International Conference on Mining Software Repositories (MSR)","author":"Casta\u00f1o J.","year":"2023","unstructured":"J. Casta\u00f1o, S. Mart\u00ednez-Fern\u00e1ndez, X. Franch, and J. Bogner, \"Analyzing the Evolution and Maintenance of ML Models on Hugging Face,\" in Proceedings of the 2024 IEEE\/ACM 21st International Conference on Mining Software Repositories (MSR), 2023."},{"key":"e_1_3_2_1_4_1","first-page":"1","volume-title":"Cohort studies in software engineering: A vision of the future,\" in Proceedings of the 14th ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM)","author":"Saarim\u00e4ki N.","year":"2020","unstructured":"N. Saarim\u00e4ki, V. Lenarduzzi, S. Vegas, N. Juristo, and D. Taibi, \"Cohort studies in software engineering: A vision of the future,\" in Proceedings of the 14th ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM), 2020, pp. 1--6."},{"key":"e_1_3_2_1_5_1","volume-title":"Lenarduzzi et al., \"Does microservices adoption impact the development velocity? a cohort study. a registered report,\" arXiv preprint arXiv:2306.02034","author":"Saarimaki N.","year":"2023","unstructured":"N. Saarimaki, M. R. Manero, N. Juristo, D. Taibi, V. Lenarduzzi et al., \"Does microservices adoption impact the development velocity? a cohort study. a registered report,\" arXiv preprint arXiv:2306.02034, 2023."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10292130"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","first-page":"106791","DOI":"10.1016\/j.infsof.2021.106791","article-title":"A systematic process for mining software repositories: Results from a systematic literature review","volume":"144","author":"Vidoni M.","year":"2022","unstructured":"M. Vidoni, \"A systematic process for mining software repositories: Results from a systematic literature review,\" Information and Software Technology, vol. 144, p. 106791, 2022.","journal-title":"Information and Software Technology"},{"key":"e_1_3_2_1_8_1","volume-title":"Assessing the Vulnerabilities of the Open-Source Artificial Intelligence (AI) Landscape: A Large-Scale Analysis of the Hugging Face Platform,\" in IEEE Intelligence and Security Informatics","author":"Kathikar A.","year":"2023","unstructured":"A. Kathikar, A. Nair, B. Lazarine, A. Sachdeva, and S. Samtani, \"Assessing the Vulnerabilities of the Open-Source Artificial Intelligence (AI) Landscape: A Large-Scale Analysis of the Hugging Face Platform,\" in IEEE Intelligence and Security Informatics. Charlotte, NC, USA: IEEE, Oct. 2023."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","first-page":"103079","DOI":"10.1016\/j.scico.2024.103079","article-title":"Hfcommunity: An extraction process and relational database to analyze hugging face hub data","volume":"234","author":"Ait A.","year":"2024","unstructured":"A. Ait, J. L. C\u00e1novas Izquierdo, and J. Cabot, \"Hfcommunity: An extraction process and relational database to analyze hugging face hub data,\" Science of Computer Programming, vol. 234, p. 103079, 2024. [Online]. Available: https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167642324000029","journal-title":"Science of Computer Programming"},{"key":"e_1_3_2_1_10_1","first-page":"2463","volume-title":"Australia: IEEE","author":"Jiang W.","year":"2023","unstructured":"W. Jiang, N. Synovic, M. Hyatt, T. R. Schorlemmer, R. Sethi, Y.-H. Lu, G. K. Thiruvathukal, and J. C. Davis, \"An Empirical Study of Pre-Trained Model Reuse in the Hugging Face Deep Learning Model Registry,\" in 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). Melbourne, Australia: IEEE, May 2023, pp. 2463--2475. [Online]. Available: https:\/\/ieeexplore.ieee.org\/document\/10172757\/"},{"key":"e_1_3_2_1_11_1","volume-title":"bias, and legal issues in pretrained models: an empirical study,\" in EMELIOT Workshop at ISSSE","author":"Pepe F.","year":"2023","unstructured":"F. Pepe and M. Di Penta, \"Fairness, bias, and legal issues in pretrained models: an empirical study,\" in EMELIOT Workshop at ISSSE, 2023."},{"key":"e_1_3_2_1_12_1","volume-title":"Exploring naming conventions (and defects) of pre-trained deep learning models in hugging face and other model hubs,\" arXiv preprint arXiv:2310.01642","author":"Jiang W.","year":"2023","unstructured":"W. Jiang, C. Cheung, G. K. Thiruvathukal, and J. C. Davis, \"Exploring naming conventions (and defects) of pre-trained deep learning models in hugging face and other model hubs,\" arXiv preprint arXiv:2310.01642, 2023."},{"issue":"1","key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40411-015-0023-0","article-title":"Investigating probabilistic sampling approaches for large-scale surveys in software engineering","volume":"3","author":"de Mello R. M.","year":"2015","unstructured":"R. M. de Mello, P. C. Da Silva, and G. H. Travassos, \"Investigating probabilistic sampling approaches for large-scale surveys in software engineering,\" Journal of Software Engineering Research and Development, vol. 3, no. 1, pp. 1--26, 2015.","journal-title":"Journal of Software Engineering Research and Development"},{"key":"e_1_3_2_1_14_1","volume-title":"Sampling techniques. john wiley & sons","author":"Cochran W. G.","year":"1977","unstructured":"W. G. Cochran, Sampling techniques. john wiley & sons, 1977."},{"key":"e_1_3_2_1_15_1","first-page":"383","volume-title":"Epidemiology in medicine,\" in Epidemiology in medicine","author":"Hennekens C. H.","year":"1987","unstructured":"C. H. Hennekens and J. E. Buring, \"Epidemiology in medicine,\" in Epidemiology in medicine, 1987, pp. 383--383."},{"issue":"11","key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","first-page":"4229","DOI":"10.1109\/TSE.2021.3113558","article-title":"Use and misuse of the term \"experiment\" in mining software repositories research","volume":"48","author":"Ayala C.","year":"2021","unstructured":"C. Ayala, B. Turhan, X. Franch, and N. Juristo, \"Use and misuse of the term \"experiment\" in mining software repositories research,\" IEEE Transactions on Software Engineering, vol. 48, no. 11, pp. 4229--4248, 2021.","journal-title":"IEEE Transactions on Software Engineering"},{"key":"e_1_3_2_1_17_1","first-page":"728","volume-title":"Macao: IEEE","author":"Ait A.","year":"2023","unstructured":"A. Ait, J. L. C. Izquierdo, and J. Cabot, \"HFCommunity: A Tool to Analyze the Hugging Face Hub Community,\" in 2023 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER). Taipa, Macao: IEEE, Mar. 2023, pp. 728--732. [Online]. Available: https:\/\/ieeexplore.ieee.org\/document\/10123660\/"},{"key":"e_1_3_2_1_18_1","first-page":"37","volume-title":"IEEE","author":"Sarwar M. U.","year":"2020","unstructured":"M. U. Sarwar, S. Zafar, M. W. Mkaouer, G. S. Walia, and M. Z. Malik, \"Multi-label classification of commit messages using transfer learning,\" in 2020 IEEE International Symposium on Software Reliability Engineering Workshops (ISSREW). IEEE, 2020, pp. 37--42."},{"key":"e_1_3_2_1_19_1","unstructured":"E. B. Swanson \"The dimensions of maintenance \" in Proceedings of the 2nd international conference on Software engineering 1976 pp. 492--497."},{"key":"e_1_3_2_1_20_1","unstructured":"\"Models - Hugging Face \" https:\/\/huggingface.co\/models Accessed: 01-02-2024."},{"issue":"5","key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1145\/571681.571686","article-title":"Principles of survey research: part 5: populations and samples","volume":"27","author":"Kitchenham B.","year":"2002","unstructured":"B. Kitchenham and S. L. Pfleeger, \"Principles of survey research: part 5: populations and samples,\" ACM SIGSOFT Software Engineering Notes, vol. 27, no. 5, pp. 17--20, 2002.","journal-title":"ACM SIGSOFT Software Engineering Notes"},{"key":"e_1_3_2_1_22_1","volume-title":"Statistical methods for meta-analysis","author":"Hedges L. V.","year":"2014","unstructured":"L. V. Hedges and I. Olkin, Statistical methods for meta-analysis. Academic press, 2014."}],"event":{"name":"WSESE '24: 1st IEEE\/ACM International Workshop on Methodological Issues with Empirical Studies in Software Engineering","location":"Lisbon Portugal","acronym":"WSESE '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the 1st IEEE\/ACM International Workshop on Methodological Issues with Empirical Studies in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643664.3648204","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643664.3648204","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:32Z","timestamp":1750291532000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643664.3648204"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,16]]},"references-count":22,"alternative-id":["10.1145\/3643664.3648204","10.1145\/3643664"],"URL":"https:\/\/doi.org\/10.1145\/3643664.3648204","relation":{},"subject":[],"published":{"date-parts":[[2024,4,16]]},"assertion":[{"value":"2024-08-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}