{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T17:02:16Z","timestamp":1776099736848,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,15]],"date-time":"2024-04-15T00:00:00Z","timestamp":1713139200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Italian Ministry of University and Research","award":["PRIN 2020 EMELIOT - 2020W3A5FY"],"award-info":[{"award-number":["PRIN 2020 EMELIOT - 2020W3A5FY"]}]},{"name":"Italian Ministry of University and Research","award":["DM 352\/2022"],"award-info":[{"award-number":["DM 352\/2022"]}]},{"name":"European Research Council (ERC)","award":["851720"],"award-info":[{"award-number":["851720"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,15]]},"DOI":"10.1145\/3643916.3644412","type":"proceedings-article","created":{"date-parts":[[2024,6,13]],"date-time":"2024-06-13T12:40:20Z","timestamp":1718282420000},"page":"370-381","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":21,"title":["How do Hugging Face Models Document Datasets, Bias, and Licenses? An Empirical Study"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3038-3977","authenticated-orcid":false,"given":"Federica","family":"Pepe","sequence":"first","affiliation":[{"name":"University of Sannio, Benevento, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7888-6620","authenticated-orcid":false,"given":"Vittoria","family":"Nardone","sequence":"additional","affiliation":[{"name":"University of Molise, Campobasso, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7965-7712","authenticated-orcid":false,"given":"Antonio","family":"Mastropaolo","sequence":"additional","affiliation":[{"name":"Software Institute, Universita della Svizzera italiana (USI), Lugano, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2216-3148","authenticated-orcid":false,"given":"Gabriele","family":"Bavota","sequence":"additional","affiliation":[{"name":"Software Institute, Universita della Svizzera italiana (USI), Lugano, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0049-1279","authenticated-orcid":false,"given":"Gerardo","family":"Canfora","sequence":"additional","affiliation":[{"name":"University of Sannio, Benevento, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0340-9747","authenticated-orcid":false,"given":"Massimiliano","family":"Di Penta","sequence":"additional","affiliation":[{"name":"University of Sannio, Benevento, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,6,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Choose an open source license https:\/\/choosealicense.com."},{"key":"e_1_3_2_1_2_1","unstructured":"2023. Open Source Initiative https:\/\/opensource.org."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems, CHI 2023","author":"Bhat Avinash","year":"2023","unstructured":"Avinash Bhat, Austin Coursey, Grace Hu, Sixian Li, Nadia Nahar, Shurui Zhou, Christian K\u00e4stner, and Jin L. C. Guo. 2023. Aspirations and Practice of ML Model Documentation: Moving the Needle with Nudging and Traceability. In Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems, CHI 2023, Hamburg, Germany, April 23--28, 2023. ACM, 749:1--749:17."},{"key":"e_1_3_2_1_4_1","unstructured":"BigScience. 2022. BigScience Large Open-science Open-access Multilingual Language Model https:\/\/huggingface.co\/bigscience\/bloom."},{"key":"e_1_3_2_1_5_1","volume-title":"Fairify: Fairness Verification of Neural Networks. arXiv preprint arXiv:2212.06140","author":"Biswas Sumon","year":"2022","unstructured":"Sumon Biswas and Hridesh Rajan. 2022. Fairify: Fairness Verification of Neural Networks. arXiv preprint arXiv:2212.06140 (2022)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3236024.3264838"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ESEM56168.2023.10304801"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409697"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3338947"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"J Cohen. 1960. A coefficient of agreement for nominal scales. Educ Psychol Meas. (1960).","DOI":"10.1177\/001316446002000104"},{"key":"e_1_3_2_1_11_1","unstructured":"Danish Contractor and Carlos Mu\u00f1oz Ferrandis. 2022. BigScience Large Open-science Open-access Multilingual Language Model https:\/\/bigscience.huggingface.co\/blog\/the-bigscience-rail-license."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533108"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103226"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Massimiliano Di Penta Daniel M. Germ\u00e1n Yann-Ga\u00ebl Gu\u00e9h\u00e9neuc and Giuliano Antoniol. 2010. An exploratory study of the evolution of software licensing. In ICSE (1). ACM 145--154.","DOI":"10.1145\/1806799.1806824"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 40th International Conference on Software Engineering: Companion Proceeedings, ICSE 2018","author":"Due\u00f1as Santiago","year":"2018","unstructured":"Santiago Due\u00f1as, Valerio Cosentino, Gregorio Robles, and Jes\u00fas M. Gonz\u00e1lez-Barahona. 2018. Perceval: software project data at your will. In Proceedings of the 40th International Conference on Software Engineering: Companion Proceeedings, ICSE 2018, Gothenburg, Sweden, May 27 - June 03, 2018. ACM, 1--4."},{"key":"e_1_3_2_1_16_1","volume-title":"Understanding and Auditing the Licensing of Open Source Software Distributions. In The 18th IEEE International Conference on Program Comprehension, ICPC 2010","author":"Germ\u00e1n Daniel M.","year":"2010","unstructured":"Daniel M. Germ\u00e1n, Massimiliano Di Penta, and Julius Davies. 2010. Understanding and Auditing the Licensing of Open Source Software Distributions. In The 18th IEEE International Conference on Program Comprehension, ICPC 2010, Braga, Minho, Portugal, June 30-July 2, 2010. IEEE Computer Society, 84--93."},{"key":"e_1_3_2_1_17_1","volume-title":"31st International Conference on Software Engineering, ICSE 2009, May 16--24, 2009, Vancouver, Canada, Proceedings. IEEE, 188--198","author":"Daniel","unstructured":"Daniel M. Germ\u00e1n and Ahmed E. Hassan. 2009. License integration patterns: Addressing license mismatches in component-based development. In 31st International Conference on Software Engineering, ICSE 2009, May 16--24, 2009, Vancouver, Canada, Proceedings. IEEE, 188--198."},{"key":"e_1_3_2_1_18_1","volume-title":"Towards understanding fairness and its composition in ensemble machine learning. arXiv preprint arXiv:2212.04593","author":"Gohar Usman","year":"2022","unstructured":"Usman Gohar, Sumon Biswas, and Hridesh Rajan. 2022. Towards understanding fairness and its composition in ensemble machine learning. arXiv preprint arXiv:2212.04593 (2022)."},{"key":"e_1_3_2_1_19_1","unstructured":"Google Inc. 2023. TensorFlow Hub https:\/\/www.tensorflow.org\/hub."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICTAI50040.2020.00068"},{"key":"e_1_3_2_1_21_1","unstructured":"Stephen Hendrick. 2022. Software Bill of Materials (SBOM) and Cybersecurity Readiness. https:\/\/tinyurl.com\/293v3xte."},{"key":"e_1_3_2_1_22_1","volume-title":"Seldonian Toolkit: Building Software with Safe and Fair Machine Learning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)","author":"Hoag A.","year":"2023","unstructured":"A. Hoag, J. E. Kostas, B. da Silva, P. S. Thomas, and Y. Brun. 2023. Seldonian Toolkit: Building Software with Safe and Fair Machine Learning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering: Companion Proceedings (ICSE-Companion). IEEE Computer Society, Los Alamitos, CA, USA, 107--111."},{"key":"e_1_3_2_1_23_1","unstructured":"Hugging Face. 2023. Hugging Face - Licenses https:\/\/huggingface.co\/docs\/hub\/repositories-licenses."},{"key":"e_1_3_2_1_24_1","unstructured":"Hugging Face. 2023. Hugging Face Hub API Endopoints https:\/\/huggingface.co\/docs\/hub\/api."},{"key":"e_1_3_2_1_25_1","unstructured":"Hugging Face Inc. 2023. Hugging Face https:\/\/huggingface.co. https:\/\/huggingface.co"},{"key":"e_1_3_2_1_26_1","volume-title":"An empirical study of pre-trained model reuse in the hugging face deep learning model registry. arXiv preprint arXiv:2303.02552","author":"Jiang Wenxin","year":"2023","unstructured":"Wenxin Jiang, Nicholas Synovic, Matt Hyatt, Taylor R Schorlemmer, Rohan Sethi, Yung-Hsiang Lu, George K Thiruvathukal, and James C Davis. 2023. An empirical study of pre-trained model reuse in the hugging face deep learning model registry. arXiv preprint arXiv:2303.02552 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2019.2909021"},{"key":"e_1_3_2_1_28_1","volume-title":"ICSR 2015","author":"Georgia","year":"2015","unstructured":"Georgia M. Kapitsaki and Frederik Kramer. 2015. Open Source License Violation Check for SPDX Files. In Software Reuse for Dynamic Systems in the Cloud and Beyond - 14th International Conference on Software Reuse, ICSR 2015, Miami, FL, USA, January 4--6, 2015. Proceeding (Lecture Notes in Computer Science, Vol. 8919). Springer, 90--105."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2016.06.064"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575879.3575973"},{"key":"e_1_3_2_1_31_1","volume-title":"Model Zoo: Discover open source deep learning code and pretrained models https:\/\/modelzoo.co.","author":"Koh Jing Yu","year":"2023","unstructured":"Jing Yu Koh. 2023. Model Zoo: Discover open source deep learning code and pretrained models https:\/\/modelzoo.co."},{"key":"e_1_3_2_1_32_1","unstructured":"Klaus Krippendorff. 2011. Computing Krippendorff's alpha-reliability. (2011)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495163"},{"key":"e_1_3_2_1_34_1","volume-title":"A Survey on Bias and Fairness in Machine Learning. ACM Comput. Surv. 54, 6","author":"Mehrabi Ninareh","year":"2022","unstructured":"Ninareh Mehrabi, Fred Morstatter, Nripsuta Saxena, Kristina Lerman, and Aram Galstyan. 2022. A Survey on Bias and Fairness in Machine Learning. ACM Comput. Surv. 54, 6 (2022), 115:1--115:35."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00136"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531959"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR59073.2023.00016"},{"key":"e_1_3_2_1_39_1","volume-title":"Improving the Nation's Cybersecurity: NIST's Responsibilities Under the","author":"NIST.","year":"2021","unstructured":"NIST. 2021. Improving the Nation's Cybersecurity: NIST's Responsibilities Under the May 2021 Executive Order. https:\/\/www.nist.gov\/itl\/executive-order-14028-improving-nations-cybersecurity"},{"key":"e_1_3_2_1_40_1","first-page":"4","article-title":"FairMask: Better Fairness via Model-Based Rebalancing of Protected Attributes","volume":"49","author":"Peng Kewen","year":"2022","unstructured":"Kewen Peng, Joymallya Chakraborty, and Tim Menzies. 2022. FairMask: Better Fairness via Model-Based Rebalancing of Protected Attributes. IEEE Trans. Softw. Eng. 49, 4 (nov 2022), 2426--2439.","journal-title":"IEEE Trans. Softw. Eng."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","unstructured":"Federica Pepe Vittoria Nardone Antonio Mastropaolo Gerardo Canfora Gabriele Bavota and Massimiliano Di Penta. 2023. Dataset of the paper: \"How do Hugging Face Models Document Datasets Bias and Licenses? An Empirical Study\". 10.5281\/zenodo.8200098","DOI":"10.5281\/zenodo.8200098"},{"key":"e_1_3_2_1_42_1","unstructured":"Responsible AI. 2022. Big Science Open Rail-M License https:\/\/www.licenses.ai\/blog\/2022\/8\/26\/bigscience-open-rail-m-license."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00458-8"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3609502"},{"key":"e_1_3_2_1_45_1","unstructured":"Donna Spencer. 2009. Card sorting: Designing usable categories. Rosenfeld Media."},{"key":"e_1_3_2_1_46_1","volume-title":"2018 IEEE Frontiers in Education Conference (FIE). 1--4. ISSN: 2377-634X.","author":"Spoletini Paola","unstructured":"Paola Spoletini and Reza M. Parizi. 2018. Bias-aware guidelines and fairness-preserving Taxonomy in software engineering education. In 2018 IEEE Frontiers in Education Conference (FIE). 1--4. ISSN: 2377-634X."},{"key":"e_1_3_2_1_47_1","unstructured":"The Linux Foundation. 2023. PyTorch Hub https:\/\/pytorch.org\/hub."},{"key":"e_1_3_2_1_48_1","volume-title":"AIMMX: Artificial Intelligence Model Metadata Extractor. In MSR '20: 17th International Conference on Mining Software Repositories","author":"Tsay Jason","year":"2020","unstructured":"Jason Tsay, Alan Braz, Martin Hirzel, Avraham Shinnar, and Todd W. Mummert. 2020. AIMMX: Artificial Intelligence Model Metadata Extractor. In MSR '20: 17th International Conference on Mining Software Repositories, Seoul, Republic of Korea, 29--30 June, 2020. ACM, 81--92."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-016-9438-4"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180221"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2015.7332449"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-016-9487-8"},{"key":"e_1_3_2_1_53_1","volume-title":"An Empirical Study on Software Bill of Materials: Where We Stand and the Road Ahead. In 45th IEEE\/ACM International Conference on Software Engineering, ICSE 2023","author":"Xia Boming","year":"2023","unstructured":"Boming Xia, Tingting Bi, Zhenchang Xing, Qinghua Lu, and Liming Zhu. 2023. An Empirical Study on Software Bill of Materials: Where We Stand and the Road Ahead. In 45th IEEE\/ACM International Conference on Software Engineering, ICSE 2023, Melbourne, Australia, May 14--20, 2023. IEEE, 2630--2642."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSEC.2023.3237100"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2010.63"}],"event":{"name":"ICPC '24: 32nd IEEE\/ACM International Conference on Program Comprehension","location":"Lisbon Portugal","acronym":"ICPC '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"]},"container-title":["Proceedings of the 32nd IEEE\/ACM International Conference on Program Comprehension"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643916.3644412","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643916.3644412","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:57:35Z","timestamp":1750291055000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643916.3644412"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,15]]},"references-count":55,"alternative-id":["10.1145\/3643916.3644412","10.1145\/3643916"],"URL":"https:\/\/doi.org\/10.1145\/3643916.3644412","relation":{},"subject":[],"published":{"date-parts":[[2024,4,15]]},"assertion":[{"value":"2024-06-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}