{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T11:51:45Z","timestamp":1781610705904,"version":"3.54.5"},"reference-count":87,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T00:00:00Z","timestamp":1773705600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T00:00:00Z","timestamp":1773705600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100021856","name":"Ministero dell\u2019Universit\u00e0 e della Ricerca","doi-asserted-by":"publisher","award":["PRIN 2020 Project EMELIOT \u201cEngineered MachinE Learning-intensive IoT system\u201d, ID 2020W3A5FY"],"award-info":[{"award-number":["PRIN 2020 Project EMELIOT \u201cEngineered MachinE Learning-intensive IoT system\u201d, ID 2020W3A5FY"]}],"id":[{"id":"10.13039\/501100021856","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["Next Generation EU PNRR DM 352\/2022 Italian Grant for Ph.D. scholarships"],"award-info":[{"award-number":["Next Generation EU PNRR DM 352\/2022 Italian Grant for Ph.D. scholarships"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Swiss National Foundation","award":["219294"],"award-info":[{"award-number":["219294"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1007\/s10664-026-10843-1","type":"journal-article","created":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T14:53:18Z","timestamp":1773759198000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Datasets, bias, licenses, and terms of use: A large and longitudinal study on the documentation of hugging face machine learning models"],"prefix":"10.1007","volume":"31","author":[{"given":"Federica","family":"Pepe","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vittoria","family":"Nardone","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Antonio","family":"Mastropaolo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gerardo","family":"Canfora","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gabriele","family":"Bavota","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0340-9747","authenticated-orcid":false,"given":"Massimiliano","family":"Di Penta","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,3,17]]},"reference":[{"key":"10843_CR1","unstructured":"Aetheris-AI (2025) AIBOM generator. https:\/\/github.com\/aetheris-ai\/aibom-generator\/, accessed: 19 Dec 2025"},{"key":"10843_CR2","first-page":"728","volume-title":"2023 IEEE International Conference on Software Analysis","author":"A Ait","year":"2023","unstructured":"Ait A, Izquierdo JLC, Cabot J (2023a) Hfcommunity: A tool to analyze the hugging face hub community. 2023 IEEE International Conference on Software Analysis. Evolution and Reengineering (SANER), IEEE, pp 728\u2013732"},{"key":"10843_CR3","unstructured":"Ait A, Izquierdo JLC, Cabot J (2023b) On the suitability of hugging face hub for empirical studies. arXiv preprint arXiv:2307.14841"},{"issue":"3","key":"10843_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10664-025-10631-3","volume":"30","author":"A Ajibode","year":"2025","unstructured":"Ajibode A, Bangash AA, Cogo FR, Adams B, Hassan AE (2025) Towards semantic versioning of open pre-trained language model releases on hugging face. Empir Softw Eng 30(3):1\u201363","journal-title":"Empir Softw Eng"},{"key":"10843_CR5","doi-asserted-by":"crossref","unstructured":"Bhat A, Coursey A, Hu G, Li S, Nahar N, Zhou S, K\u00e4stner C, Guo JLC (2023) Aspirations and practice of ML model documentation: Moving the needle with nudging and traceability. In: Proceedings of the 2023 CHI conference on human factors in computing systems, CHI 2023, Hamburg, Germany, April 23\u201328, 2023, ACM, pp 749:1\u2013749:17","DOI":"10.1145\/3544548.3581518"},{"key":"10843_CR6","unstructured":"BigScience (2022) BigScience Large Open-science Open-access Multilingual Language Model https:\/\/huggingface.co\/bigscience\/bloom"},{"key":"10843_CR7","unstructured":"Biswas S, Rajan H (2022) Fairify: Fairness verification of neural networks. arXiv preprint arXiv:2212.06140"},{"key":"10843_CR8","doi-asserted-by":"publisher","unstructured":"Brun Y, Meliou A (2018) Software fairness. In: Proceedings of the 2018 26th ACM joint meeting on European software engineering conference and symposium on the foundations of software engineering, Association for Computing Machinery, New York, NY, USA, ESEC\/FSE 2018, pp 754\u2013759. https:\/\/doi.org\/10.1145\/3236024.3264838","DOI":"10.1145\/3236024.3264838"},{"key":"10843_CR9","doi-asserted-by":"crossref","unstructured":"Casta\u00f1o J, Mart\u00ednez-Fern\u00e1ndez S, Franch X, Bogner J (2023) Exploring the carbon footprint of hugging face\u2019s ml models: A repository mining study. In: Proceedings of the ACM\/IEEE international symposium on empirical software engineering and measurement (ESEM), Mon 23 - Fri 27 October 2023 New Orleans, Louisiana, United States","DOI":"10.1109\/ESEM56168.2023.10304801"},{"key":"10843_CR10","unstructured":"Casta\u00f1o J, Mart\u00ednez-Fern\u00e1ndez S, Franch X (2024a) Lessons learned from mining the hugging face repository. In: Proceedings of the 1st IEEE\/ACM international workshop on methodological issues with empirical studies in software engineering, pp 1\u20136"},{"key":"10843_CR11","doi-asserted-by":"crossref","unstructured":"Casta\u00f1o J, Mart\u00ednez-Fern\u00e1ndez S, Franch X, Bogner J (2024b) Analyzing the evolution and maintenance of ml models on hugging face. In: 2024 IEEE\/ACM 21st international conference on mining software repositories (MSR), IEEE, pp 607\u2013618","DOI":"10.1145\/3643991.3644898"},{"key":"10843_CR12","doi-asserted-by":"crossref","unstructured":"Chakraborty J, Majumder S, Yu Z, Menzies T (2020) Fairway: a way to build fair ml software. In: Proceedings of the 28th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, pp 654\u2013665","DOI":"10.1145\/3368089.3409697"},{"key":"10843_CR13","doi-asserted-by":"crossref","unstructured":"Chang J, Custis C (2022) Understanding implementation challenges in machine learning documentation. In: Proceedings of the 2nd ACM conference on equity and access in algorithms, mechanisms, and optimization, pp 1\u20138","DOI":"10.1145\/3551624.3555301"},{"key":"10843_CR14","doi-asserted-by":"crossref","unstructured":"Chaparro O, Bernal-C\u00e1rdenas C, Lu J, Moran K, Marcus A, Di Penta M, Poshyvanyk D, Ng V (2019) Assessing the quality of the steps to reproduce in bug reports. In: Proceedings of the ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, ESEC\/SIGSOFT FSE 2019, Tallinn, Estonia, August 26\u201330, 2019, ACM, pp 86\u201396","DOI":"10.1145\/3338906.3338947"},{"key":"10843_CR15","doi-asserted-by":"publisher","unstructured":"Chen Z, Zhang JM, Hort M, Harman M, Sarro F (2024) Fairness testing: A comprehensive survey and analysis of trends. ACM Trans Softw Eng Methodol 33(5). https:\/\/doi.org\/10.1145\/3652155","DOI":"10.1145\/3652155"},{"key":"10843_CR16","unstructured":"Choose an open source license (2023). https:\/\/choosealicense.com"},{"key":"10843_CR17","doi-asserted-by":"publisher","unstructured":"Cin\u00e0 AE, Grosse K, Demontis A, Vascon S, Zellinger W, Moser BA, Oprea A, Biggio B, Pelillo M, Roli F (2023) Wild patterns reloaded: A survey of machine learning security against training data poisoning. ACM Comput Surv 55(13s). https:\/\/doi.org\/10.1145\/3585385","DOI":"10.1145\/3585385"},{"key":"10843_CR18","doi-asserted-by":"crossref","unstructured":"Cohen J (1960) A coefficient of agreement for nominal scales. Educ Psychol Meas","DOI":"10.1177\/001316446002000104"},{"key":"10843_CR19","unstructured":"Confident AI (2025) DeepEval: The LLM Evaluation Framework. https:\/\/github.com\/confident-ai\/deepeval, accessed: 05 Apr 2025"},{"key":"10843_CR20","unstructured":"Contractor D, Ferrandis CM (2022) BigScience Large Open-science Open-access Multilingual Language Model https:\/\/bigscience.huggingface.co\/blog\/the-bigscience-rail-license"},{"key":"10843_CR21","unstructured":"Council of the European Union (2024) Proposal for a regulation of the European parliament and of the council laying down harmonised rules on artificial intelligence (Artificial Intelligence Act) and amending certain Union legislative acts. https:\/\/digital-strategy.ec.europa.eu\/en\/policies\/regulatory-framework-ai%7D%7D"},{"key":"10843_CR22","doi-asserted-by":"publisher","unstructured":"Crisan A, Drouhard M, Vig J, Rajani N (2022) Interactive model cards: A human-centered approach to model documentation. In: Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency, Association for Computing Machinery, New York, NY, USA, FAccT \u201922, pp 427\u2013439. https:\/\/doi.org\/10.1145\/3531146.3533108","DOI":"10.1145\/3531146.3533108"},{"key":"10843_CR23","doi-asserted-by":"crossref","unstructured":"d\u2019Aloisio G, D\u2019Angelo A, Di Marco A, Stilo G (2023) Debiaser for multiple variables to enhance fairness in classification tasks. Inf Process Manag 60(2):103226. https:\/\/doi.org\/10.1016\/j.ipm.2022.103226, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0306457322003272","DOI":"10.1016\/j.ipm.2022.103226"},{"key":"10843_CR24","doi-asserted-by":"publisher","unstructured":"D\u2019Avino R, Nocera S, Bifolco D, Pepe F, Di Penta M, Scanniello G (2025) ALOHA: A(IBoM) tooL generatOr for Hugging fAce. In: Proceedings of the 29th international conference on evaluation and assessment in software engineering, Association for Computing Machinery, New York, NY, USA, EASE \u201925, pp 929\u2013937. https:\/\/doi.org\/10.1145\/3756681.3756998","DOI":"10.1145\/3756681.3756998"},{"key":"10843_CR25","unstructured":"Deepchecks (2025) Deepchecks - Continuous Validation for AI & ML: Testing, CI & Monitoring. https:\/\/github.com\/deepchecks\/deepchecks, accessed: 05 Apr 2025"},{"key":"10843_CR26","doi-asserted-by":"crossref","unstructured":"Due\u00f1as S, Cosentino V, Robles G, Gonz\u00e1lez-Barahona JM (2018) Perceval: software project data at your will. In: Proceedings of the 40th international conference on software engineering: companion proceeedings, ICSE 2018, Gothenburg, Sweden, May 27 - June 03, 2018, ACM, pp 1\u20134","DOI":"10.1145\/3183440.3183475"},{"issue":"1","key":"10843_CR27","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/s10664-023-10402-y","volume":"29","author":"C Ferrara","year":"2024","unstructured":"Ferrara C, Sellitto G, Ferrucci F, Palomba F, De Lucia A (2024) Fairness-aware machine learning engineering: how far are we? Empir Softw Eng 29(1):9","journal-title":"Empir Softw Eng"},{"issue":"1","key":"10843_CR28","doi-asserted-by":"publisher","first-page":"87","DOI":"10.2307\/2340521","volume":"85","author":"RA Fisher","year":"1922","unstructured":"Fisher RA (1922) On the interpretation of chi-square from contingency tables, and the calculation of p. J Roy Stat Soc 85(1):87\u201394","journal-title":"J Roy Stat Soc"},{"key":"10843_CR29","doi-asserted-by":"crossref","unstructured":"Galhotra S, Brun Y, Meliou A (2017) Fairness testing: testing software for discrimination. In: Proceedings of the 2017 11th Joint meeting on foundations of software engineering, pp 498\u2013510","DOI":"10.1145\/3106237.3106277"},{"key":"10843_CR30","doi-asserted-by":"crossref","unstructured":"Germ\u00e1n DM, Hassan AE (2009) License integration patterns: Addressing license mismatches in component-based development. In: 31st International conference on software engineering, ICSE 2009, May 16\u201324, 2009, Vancouver, Canada, Proceedings, IEEE, pp 188\u2013198","DOI":"10.1109\/ICSE.2009.5070520"},{"key":"10843_CR31","doi-asserted-by":"crossref","unstructured":"Germ\u00e1n DM, Di Penta M, Davies J (2010) Understanding and auditing the licensing of open source software distributions. In: The 18th IEEE international conference on program comprehension, ICPC 2010, Braga, Minho, Portugal, June 30-July 2, 2010, IEEE Computer Society, pp 84\u201393","DOI":"10.1109\/ICPC.2010.48"},{"key":"10843_CR32","unstructured":"Giskard AI (2025) The evaluation & testing framework for AI systems. https:\/\/github.com\/Giskard-AI\/giskard, accessed: 05 Apr 2025"},{"key":"10843_CR33","doi-asserted-by":"crossref","unstructured":"Gohar U, Biswas S, Rajan H (2022) Towards understanding fairness and its composition in ensemble machine learning. arXiv preprint arXiv:2212.04593","DOI":"10.1109\/ICSE48619.2023.00133"},{"key":"10843_CR34","unstructured":"Grissom RJ, Kim JJ (2005) Effect sizes for research: A broad practical approach, 2nd edn. Lawrence Earlbaum Associates"},{"issue":"CSCW2","key":"10843_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3555760","volume":"6","author":"AK Heger","year":"2022","unstructured":"Heger AK, Marquis LB, Vorvoreanu M, Wallach H, Wortman Vaughan J (2022) Understanding machine learning practitioners\u2019 data documentation perceptions, needs, challenges, and desiderata. Proc ACM Human-Comput Interact 6(CSCW2):1\u201329","journal-title":"Proc ACM Human-Comput Interact"},{"key":"10843_CR36","doi-asserted-by":"crossref","unstructured":"Hendrick S (2022) Software bill of materials (SBOM) and cybersecurity readiness. https:\/\/tinyurl.com\/293v3xte","DOI":"10.70828\/RYTL5793"},{"key":"10843_CR37","first-page":"107","volume-title":"2023 IEEE\/ACM 45th international conference on software engineering: companion proceedings (ICSE-Companion)","author":"A Hoag","year":"2023","unstructured":"Hoag A, Kostas JE, da Silva B, Thomas PS, Brun Y (2023) Seldonian toolkit: Building software with safe and fair machine learning. 2023 IEEE\/ACM 45th international conference on software engineering: companion proceedings (ICSE-Companion). IEEE Computer Society, Los Alamitos, CA, USA, pp 107\u2013111"},{"key":"10843_CR38","first-page":"65","volume":"6","author":"S Holm","year":"1979","unstructured":"Holm S (1979) A simple sequentially rejective Bonferroni test procedure. Scand J Stat 6:65\u201370","journal-title":"Scand J Stat"},{"key":"10843_CR39","unstructured":"Hugging Face (2023a) Hugging Face - Licenses https:\/\/huggingface.co\/docs\/hub\/repositories-licenses"},{"key":"10843_CR40","unstructured":"Hugging Face (2023b) Hugging Face Hub API Endopoints https:\/\/huggingface.co\/docs\/hub\/api"},{"key":"10843_CR41","unstructured":"Google Inc (2023) TensorFlow Hub https:\/\/www.tensorflow.org\/hub"},{"key":"10843_CR42","unstructured":"Hugging Face Inc (2023) Hugging face https:\/\/huggingface.cohttps:\/\/huggingface.co"},{"key":"10843_CR43","doi-asserted-by":"crossref","unstructured":"Jiang W, Synovic N, Hyatt M, Schorlemmer TR, Sethi R, Lu YH, Thiruvathukal GK, Davis JC (2023) An empirical study of pre-trained model reuse in the hugging face deep learning model registry. arXiv preprint arXiv:2303.02552","DOI":"10.1109\/ICSE48619.2023.00206"},{"key":"10843_CR44","doi-asserted-by":"crossref","unstructured":"Jiang W, Yasmin J, Jones J, Synovic N, Kuo J, Bielanski N, Tian Y, Thiruvathukal GK, Davis JC (2024) Peatmoss: A dataset and initial analysis of pre-trained models in open-source software. In: 2024 IEEE\/ACM 21st international conference on mining software repositories (MSR), IEEE, pp 431\u2013443","DOI":"10.1145\/3643991.3644907"},{"key":"10843_CR45","doi-asserted-by":"crossref","unstructured":"Jones J, Jiang W, Synovic N, Thiruvathukal G, Davis J (2024) What do we know about hugging face? a systematic literature review and quantitative validation of qualitative claims. In: Proceedings of the 18th ACM\/IEEE international symposium on empirical software engineering and measurement, pp 13\u201324","DOI":"10.1145\/3674805.3686665"},{"issue":"5","key":"10843_CR46","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1007\/s10664-015-9393-5","volume":"21","author":"E Kalliamvakou","year":"2016","unstructured":"Kalliamvakou E, Gousios G, Blincoe K, Singer L, Germ\u00e1n DM, Damian DE (2016) An in-depth study of the promises and perils of mining github. Empir Softw Eng 21(5):2035\u20132071","journal-title":"Empir Softw Eng"},{"issue":"5","key":"10843_CR47","doi-asserted-by":"publisher","first-page":"919","DOI":"10.1109\/TSE.2019.2909021","volume":"47","author":"GM Kapitsaki","year":"2021","unstructured":"Kapitsaki GM, Charalambous G (2021) Modeling and recommending open source licenses with findosslicense. IEEE Trans Software Eng 47(5):919\u2013935","journal-title":"IEEE Trans Software Eng"},{"key":"10843_CR48","unstructured":"Kapitsaki GM, Kramer F (2015) Open source license violation check for SPDX files. In: Software reuse for dynamic systems in the cloud and beyond - 14th international conference on software reuse, ICSR 2015, Miami, FL, USA, January 4\u20136, 2015. Proceeding, Springer, Lecture Notes in Computer Science, vol 8919, pp 90\u2013105"},{"key":"10843_CR49","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1016\/j.jss.2016.06.064","volume":"131","author":"GM Kapitsaki","year":"2017","unstructured":"Kapitsaki GM, Kramer F, Tselikas ND (2017) Automating the license compatibility process in open source software with SPDX. J Syst Softw 131:386\u2013401","journal-title":"J Syst Softw"},{"key":"10843_CR50","doi-asserted-by":"crossref","unstructured":"Kapitsaki GM, Paphitou AC, Achilleos A (2022) Towards open source software licenses compatibility check. In: Proceedings of the 26th pan-hellenic conference on informatics, PCI 2022, Athens, Greece, November 25\u201327, 2022, ACM, pp 96\u2013101","DOI":"10.1145\/3575879.3575973"},{"key":"10843_CR51","unstructured":"Koh JY (2023) Model Zoo: Discover open source deep learning code and pretrained models https:\/\/modelzoo.co"},{"issue":"1","key":"10843_CR52","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1177\/001316447003000105","volume":"30","author":"K Krippendorff","year":"1970","unstructured":"Krippendorff K (1970) Estimating the reliability, systematic error, and random error of interval data. Educ Psychol Measur 30(1):61\u201370","journal-title":"Educ Psychol Measur"},{"key":"10843_CR53","doi-asserted-by":"publisher","unstructured":"Li CT, Hsu C, Zhang Y (2022) FairSR: Fairness-aware Sequential Recommendation through Multi-Task Learning with Preference Graph Embeddings. ACM Trans Intell Syst Technol 13(1):16:1\u201316:21. https:\/\/doi.org\/10.1145\/3495163","DOI":"10.1145\/3495163"},{"issue":"1","key":"10843_CR54","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/3468507.3468511","volume":"23","author":"K Makhlouf","year":"2021","unstructured":"Makhlouf K, Zhioua S, Palamidessi C (2021) On the applicability of machine learning fairness notions. ACM SIGKDD Explorations Newsl 23(1):14\u201323","journal-title":"ACM SIGKDD Explorations Newsl"},{"key":"10843_CR55","doi-asserted-by":"crossref","unstructured":"Mehrabi N, Morstatter F, Saxena N, Lerman K, Galstyan A (2022) A survey on bias and fairness in machine learning. ACM Comput Surv 54(6):115:1\u2013115:35","DOI":"10.1145\/3457607"},{"key":"10843_CR56","unstructured":"Meta Inc (2024) Llama - acceptable use policy. https:\/\/ai.meta.com\/llama\/use-policy\/"},{"key":"10843_CR57","doi-asserted-by":"publisher","unstructured":"Mitchell M, Wu S, Zaldivar A, Barnes P, Vasserman L, Hutchinson B, Spitzer E, Raji ID, Gebru T (2019) Model cards for model reporting. In: Proceedings of the Conference on Fairness, Accountability, and Transparency, Association for Computing Machinery, New York, NY, USA, FAT* \u201919, p 220\u2013229. https:\/\/doi.org\/10.1145\/3287560.3287596","DOI":"10.1145\/3287560.3287596"},{"key":"10843_CR58","doi-asserted-by":"crossref","unstructured":"Monjezi V, Trivedi A, Tan G, Tizpaz-Niari S (2023) Information-theoretic testing and debugging of fairness defects in deep neural networks. In: Proceedings of the 45th international conference on software engineering, IEEE Press, ICSE \u201923, pp 1571\u20131582","DOI":"10.1109\/ICSE48619.2023.00136"},{"key":"10843_CR59","doi-asserted-by":"publisher","unstructured":"Naghiaei M, Rahmani HA, Deldjoo Y (2022) CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems. In: Proceedings of the 45th International ACM SIGIR conference on research and development in information retrieval, ACM, Madrid Spain, pp 770\u2013779. https:\/\/doi.org\/10.1145\/3477495.3531959","DOI":"10.1145\/3477495.3531959"},{"key":"10843_CR60","unstructured":"Nguyen PT, Rubei R, Di Rocco J, Sipio CD, Di Ruscio D, Di Penta M (2023) Dealing with popularity bias in recommender systems for third-party libraries: How far are we? In: 20th IEEE\/ACM international conference on mining software repositories, MSR 2023, Melbourne, Australia, May 15\u201316, 2023, pp 12\u201324"},{"key":"10843_CR61","unstructured":"NIST (2021) Improving the nation\u2019s cybersecurity: Nist\u2019s responsibilities under the May 2021 executive order. https:\/\/www.nist.gov\/itl\/executive-order-14028-improving-nations-cybersecurity"},{"key":"10843_CR62","doi-asserted-by":"publisher","unstructured":"Openja M, Laberge G, Khomh F (2024) Detection and evaluation of bias-inducing features in machine learning. Empir Softw Eng 29(1):22. https:\/\/doi.org\/10.1007\/S10664-023-10409-5","DOI":"10.1007\/S10664-023-10409-5"},{"issue":"4","key":"10843_CR63","doi-asserted-by":"publisher","first-page":"2426","DOI":"10.1109\/TSE.2022.3220713","volume":"49","author":"K Peng","year":"2022","unstructured":"Peng K, Chakraborty J, Menzies T (2022) Fairmask: Better fairness via model-based rebalancing of protected attributes. IEEE Trans Softw Eng 49(4):2426\u20132439","journal-title":"IEEE Trans Softw Eng"},{"key":"10843_CR64","doi-asserted-by":"crossref","unstructured":"Di Penta M, Germ\u00e1n DM, Gu\u00e9h\u00e9neuc Y, Antoniol G (2010) An exploratory study of the evolution of software licensing. In: ICSE (1), ACM, pp 145\u2013154","DOI":"10.1145\/1806799.1806824"},{"key":"10843_CR65","doi-asserted-by":"crossref","unstructured":"Pepe F, Nardone V, Mastropaolo A, Bavota G, Canfora G, Di Penta M (2024) How do hugging face models document datasets, bias, and licenses? an empirical study. In: Proceedings of the 32nd IEEE\/ACM international conference on program comprehension, pp 370\u2013381","DOI":"10.1145\/3643916.3644412"},{"key":"10843_CR66","doi-asserted-by":"publisher","unstructured":"Pepe F, Nardone V, Mastropaolo A, Canfora G, Bavota G, Di Penta M (2025) Replication package for the paper: \u201cdatasets, bias, licenses, and terms of use: A large and longitudinal study on the documentation of hugging face machine learning models\u201d. https:\/\/doi.org\/10.5281\/zenodo.15187256","DOI":"10.5281\/zenodo.15187256"},{"key":"10843_CR67","unstructured":"Responsible AI (2022) Big science open Rail-M license. https:\/\/www.licenses.ai\/blog\/2022\/8\/26\/bigscience-open-rail-m-license"},{"key":"10843_CR68","doi-asserted-by":"crossref","unstructured":"Sallami D, A\u00efmeur E (2024) Fairframe: a fairness framework for bias detection and mitigation in news. AI and Ethics pp 1\u201317","DOI":"10.1007\/s43681-024-00568-6"},{"issue":"3","key":"10843_CR69","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1038\/s42256-022-00458-8","volume":"4","author":"P Schramowski","year":"2022","unstructured":"Schramowski P, Turan C, Andersen N, Rothkopf CA, Kersting K (2022) Large pre-trained language models contain human-like biases of what is right and wrong to do. Nat Mach Intell 4(3):258\u2013268","journal-title":"Nat Mach Intell"},{"key":"10843_CR70","doi-asserted-by":"crossref","unstructured":"Di Sipio C, Rubei R, Di Rocco J, Di Ruscio D, Nguyen PT (2024) Automated categorization of pre-trained models for software engineering: A case study with a hugging face dataset. arXiv preprint arXiv:2405.13185","DOI":"10.1145\/3661167.3661215"},{"key":"10843_CR71","unstructured":"Spencer D (2009) Card sorting: Designing usable categories. Rosenfeld Media"},{"key":"10843_CR72","unstructured":"Spoletini P, Parizi RM (2018) Bias-aware guidelines and fairness-preserving Taxonomy in software engineering education. In: 2018 IEEE Frontiers in Education Conference (FIE), pp 1\u20134, iSSN: 2377\u2013634X"},{"key":"10843_CR73","unstructured":"Stalnaker T, Wintersgill N, Chaparro O, Heymann LA, Di Penta M, German DM, Poshyvanyk D (2025b) The ml supply chain in the era of software 2.0: Lessons learned from hugging face. arXiv preprint arXiv:2502.04484"},{"key":"10843_CR74","doi-asserted-by":"publisher","unstructured":"Stalnaker T, Wintersgill N, Chaparro O, Heymann LA, Di Penta M, German DM, Poshyvanyk D (2025) Developer perspectives on licensing and copyright issues arising from generative ai for software development. ACM Trans Softw Eng Methodol. https:\/\/doi.org\/10.1145\/3743133","DOI":"10.1145\/3743133"},{"key":"10843_CR75","first-page":"512","volume-title":"2024 IEEE international conference on software analysis","author":"M Taraghi","year":"2024","unstructured":"Taraghi M, Dorcelus G, Foundjem A, Tambon F, Khomh F (2024) Deep learning model reuse in the huggingface community: Challenges, benefit and trends. 2024 IEEE international conference on software analysis. Evolution and Reengineering (SANER), IEEE, pp 512\u2013523"},{"key":"10843_CR76","unstructured":"The Linux Foundation (2023) PyTorch Hub https:\/\/pytorch.org\/hub"},{"key":"10843_CR77","doi-asserted-by":"crossref","unstructured":"Tsay J, Braz A, Hirzel M, Shinnar A, Mummert TW (2020) AIMMX: artificial intelligence model metadata extractor. In: MSR \u201920: 17th International Conference on Mining Software Repositories, Seoul, Republic of Korea, 29\u201330 June, 2020, ACM, pp 81\u201392","DOI":"10.1145\/3379597.3387448"},{"key":"10843_CR78","doi-asserted-by":"crossref","unstructured":"Vendome C, V\u00e1squez ML, Bavota G, Di Penta M, Germ\u00e1n DM, Poshyvanyk D (2015) When and why developers adopt and change software licenses. In: 2015 IEEE international conference on software maintenance and evolution, ICSME 2015, Bremen, Germany, September 29 - October 1, 2015, IEEE Computer Society, pp 31\u201340","DOI":"10.1109\/ICSM.2015.7332449"},{"issue":"3","key":"10843_CR79","doi-asserted-by":"publisher","first-page":"1537","DOI":"10.1007\/s10664-016-9438-4","volume":"22","author":"C Vendome","year":"2017","unstructured":"Vendome C, Bavota G, Di Penta M, V\u00e1squez ML, Germ\u00e1n DM, Poshyvanyk D (2017) License usage and changes: a large-scale study on GitHub. Empir Softw Eng 22(3):1537\u20131577","journal-title":"Empir Softw Eng"},{"key":"10843_CR80","doi-asserted-by":"crossref","unstructured":"Vendome C, Germ\u00e1n DM, Di Penta M, Bavota G, V\u00e1squez ML, Poshyvanyk D (2018) To distribute or not to distribute?: why licensing bugs matter. In: Proceedings of the 40th International Conference on Software Engineering, ICSE 2018, Gothenburg, Sweden, May 27 - June 03, 2018, ACM, pp 268\u2013279","DOI":"10.1145\/3180155.3180221"},{"key":"10843_CR81","unstructured":"White House Office of Science and Technology Policy (2022) Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People. https:\/\/www.whitehouse.gov\/ostp\/ai-bill-of-rights\/"},{"issue":"6","key":"10843_CR82","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon F (1945) Individual comparisons by ranking methods. Biometrics Bulletin 1(6):80\u201383","journal-title":"Biometrics Bulletin"},{"issue":"3","key":"10843_CR83","doi-asserted-by":"publisher","first-page":"1194","DOI":"10.1007\/s10664-016-9487-8","volume":"22","author":"Y Wu","year":"2017","unstructured":"Wu Y, Manabe Y, Kanda T, Germ\u00e1n DM, Inoue K (2017) Analysis of license inconsistency in large collections of open source projects. Empir Softw Eng 22(3):1194\u20131222","journal-title":"Empir Softw Eng"},{"key":"10843_CR84","doi-asserted-by":"crossref","unstructured":"Xia B, Bi T, Xing Z, Lu Q, Zhu L (2023) An empirical study on software bill of materials: Where we stand and the road ahead. In: 45th IEEE\/ACM International Conference on Software Engineering, ICSE 2023, Melbourne, Australia, May 14\u201320, 2023, IEEE, pp 2630\u20132642","DOI":"10.1109\/ICSE48619.2023.00219"},{"key":"10843_CR85","unstructured":"Yang X, Liang W, Zou J (2024) Navigating dataset documentations in ai: A large-scale analysis of dataset cards on hugging face. arXiv preprint arXiv:2401.13822"},{"key":"10843_CR86","doi-asserted-by":"crossref","unstructured":"Zahan N, Lin E, Tamanna M, Enck W, Williams L (2023) Software bills of materials are required. are we there yet? IEEE Secur Privacy 21(2):82\u201388","DOI":"10.1109\/MSEC.2023.3237100"},{"issue":"5","key":"10843_CR87","doi-asserted-by":"publisher","first-page":"618","DOI":"10.1109\/TSE.2010.63","volume":"36","author":"T Zimmermann","year":"2010","unstructured":"Zimmermann T, Premraj R, Bettenburg N, Just S, Schr\u00f6ter A, Weiss C (2010) What makes a good bug report? IEEE Trans Software Eng 36(5):618\u2013643","journal-title":"IEEE Trans Software Eng"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10843-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-026-10843-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10843-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T11:00:02Z","timestamp":1781607602000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-026-10843-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,17]]},"references-count":87,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2026,7]]}},"alternative-id":["10843"],"URL":"https:\/\/doi.org\/10.1007\/s10664-026-10843-1","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,17]]},"assertion":[{"value":"15 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Ethics approval does not apply as the research did not involve human participants.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Informed consent does not apply as the research did not involve human participants.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Clinical trial number: not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number"}}],"article-number":"95"}}