{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T20:39:46Z","timestamp":1757623186546,"version":"3.44.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032042064","type":"print"},{"value":"9783032042071","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T00:00:00Z","timestamp":1757376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T00:00:00Z","timestamp":1757376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04207-1_22","type":"book-chapter","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T03:31:11Z","timestamp":1757388671000},"page":"327-344","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Generate with\u00a0CodeXHug: A Dataset to\u00a0Enhance Model Cards with\u00a0Code Usage Patterns"],"prefix":"10.1007","author":[{"given":"Stefano","family":"Palombo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9872-9542","authenticated-orcid":false,"given":"Claudio","family":"Di Sipio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7909-3902","authenticated-orcid":false,"given":"Juri","family":"Di Rocco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5077-6793","authenticated-orcid":false,"given":"Davide","family":"Di Ruscio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,9]]},"reference":[{"key":"22_CR1","unstructured":"Mongodb. https:\/\/www.mongodb.com\/. Accessed 11 Mar 2024"},{"key":"22_CR2","unstructured":"Mysql connector\/python. https:\/\/pypi.org\/project\/mysql-connector-python\/. Accessed 11 Mar 2024"},{"key":"22_CR3","unstructured":"Pygithub documentation. https:\/\/pygithub.readthedocs.io\/en\/stable\/. Accessed 11 Mar 2024"},{"key":"22_CR4","doi-asserted-by":"publisher","unstructured":"Abid, S., Shamail, S., Basit, H.A., Nadi, S.: FACER: An API usage-based code-example recommender for opportunistic reuse. Empir. Softw. Eng. 26(6), 1\u201358 (2021). https:\/\/doi.org\/10.1007\/s10664-021-10000-w","DOI":"10.1007\/s10664-021-10000-w"},{"key":"22_CR5","doi-asserted-by":"publisher","unstructured":"Abid, S., Shamail, S., Basit, H.A., Nadi, S.: FACER: An API usage-based code-example recommender for opportunistic reuse. Empir. Softw. Eng. 26(6), 1\u201358 (2021). https:\/\/doi.org\/10.1007\/s10664-021-10000-w","DOI":"10.1007\/s10664-021-10000-w"},{"key":"22_CR6","doi-asserted-by":"publisher","unstructured":"Ait, A., Izquierdo, J.L.C., Cabot, J.: HFCommunity: a tool to analyze the hugging face hub community. In: 2023 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER), pp. 728\u2013732, March 2023. https:\/\/doi.org\/10.1109\/SANER56733.2023.00080, https:\/\/ieeexplore.ieee.org\/document\/10123660, iSSN: 2640-7574","DOI":"10.1109\/SANER56733.2023.00080"},{"key":"22_CR7","doi-asserted-by":"publisher","unstructured":"Casta\u00f1o, J., Mart\u00ednez-Fern\u00e1ndez, S., Franch, X., Bogner, J.: Analyzing the Evolution and Maintenance of ML Models on Hugging Face, November 2023. https:\/\/doi.org\/10.48550\/arXiv.2311.13380, arXiv:2311.13380 [cs]","DOI":"10.48550\/arXiv.2311.13380"},{"key":"22_CR8","doi-asserted-by":"publisher","unstructured":"Di\u00a0Sipio, C., Di\u00a0Rocco, J., Di\u00a0Ruscio, D., Palombo, S.: Codexhug: a curated dataset of huggingface pre- trained models exploited in the github ecosystem, December 2024. https:\/\/doi.org\/10.5281\/zenodo.14267550","DOI":"10.5281\/zenodo.14267550"},{"key":"22_CR9","doi-asserted-by":"publisher","unstructured":"Di\u00a0Sipio, C., Rubei, R., Di\u00a0Rocco, J., Di\u00a0Ruscio, D., Nguyen, P.T.: Automated categorization of pre-trained models in software engineering: A case study with a hugging face dataset. In: Proceedings of the 28th International Conference on Evaluation and Assessment in Software Engineering, pp. 351\u2013356. EASE \u201924. Association for Computing Machinery, New York (2024). https:\/\/doi.org\/10.1145\/3661167.3661215","DOI":"10.1145\/3661167.3661215"},{"key":"22_CR10","doi-asserted-by":"publisher","unstructured":"Ding, Z., Li, H., Shang, W., Chen, T.-H.P.: Can pre-trained code embeddings improve model performance? Revisiting the use of code embeddings in software engineering tasks. Empir. Softw. Eng. 27(3), 1\u201338 (2022). https:\/\/doi.org\/10.1007\/s10664-022-10118-5","DOI":"10.1007\/s10664-022-10118-5"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Feng, Z., et al.: Codebert: a pre-trained model for programming and natural languages (2020). https:\/\/arxiv.org\/abs\/2002.08155","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"22_CR12","doi-asserted-by":"publisher","unstructured":"Fowkes, J., Sutton, C.: Parameter-free Probabilistic API Mining Across GitHub. In: 24th ACM SIGSOFT International Symposium on Foundations of Software Engineering, pp. 254\u2013265. ACM, New York (2016). https:\/\/doi.org\/10.1145\/2950290.2950319","DOI":"10.1145\/2950290.2950319"},{"key":"22_CR13","doi-asserted-by":"publisher","unstructured":"Gao, H., Zahedi, M., Treude, C., Rosenstock, S., Cheong, M.: Documenting ethical considerations in open source ai models. In: Proceedings of the 18th ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement, ESEM \u201924, pp. 177\u2013188. Association for Computing Machinery, New York (2024). https:\/\/doi.org\/10.1145\/3674805.3686679","DOI":"10.1145\/3674805.3686679"},{"key":"22_CR14","doi-asserted-by":"publisher","unstructured":"Gong, L., Zhang, J., Wei, M., Zhang, H., Huang, Z.: What is the intended usage context of this model? an exploratory study of pre-trained models on various model repositories. ACM Trans. Softw. Eng. Methodol. 32(3), 69:1\u201369:57 (2023). https:\/\/doi.org\/10.1145\/3569934","DOI":"10.1145\/3569934"},{"key":"22_CR15","unstructured":"Grattafiori, A., Dubey, A., Jauhri, A., Pandey, A., et\u00a0al.: The llama 3 herd of models (2024). https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"22_CR16","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.aiopen.2021.08.002","volume":"2","author":"X Han","year":"2021","unstructured":"Han, X., Zhang, Z., Ding, N., Gu, Y., Liu, X., et al.: Pre-trained models: past, present and future. AI Open 2, 225\u2013250 (2021). https:\/\/doi.org\/10.1016\/j.aiopen.2021.08.002","journal-title":"AI Open"},{"key":"22_CR17","doi-asserted-by":"publisher","DOI":"10.1145\/3695988","author":"X Hou","year":"2024","unstructured":"Hou, X., et al.: Large language models for software engineering: a systematic literature review. ACM Trans. Softw. Eng. Methodol. (2024). https:\/\/doi.org\/10.1145\/3695988. just Accepted","journal-title":"ACM Trans. Softw. Eng. Methodol."},{"key":"22_CR18","doi-asserted-by":"publisher","unstructured":"Jain, N., Vaidyanath, S., Iyer, A., Natarajan, N., Parthasarathy, S., Rajamani, S., Sharma, R.: Jigsaw: large language models meet program synthesis. In: Proceedings of the 44th International Conference on Software Engineering, ICSE \u201922, pp. 1219\u20131231. Association for Computing Machinery, New York, July 2022. https:\/\/doi.org\/10.1145\/3510003.3510203, https:\/\/dl.acm.org\/doi\/10.1145\/3510003.3510203","DOI":"10.1145\/3510003.3510203"},{"issue":"3","key":"22_CR19","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/s10664-011-9171-y","volume":"17","author":"F Khomh","year":"2012","unstructured":"Khomh, F., Penta, M.D., Gu\u00e9h\u00e9neuc, Y.G., Antoniol, G.: An exploratory study of the impact of antipatterns on class change- and fault-proneness. Empir. Softw. Eng. 17(3), 243\u2013275 (2012). https:\/\/doi.org\/10.1007\/s10664-011-9171-y","journal-title":"Empir. Softw. Eng."},{"key":"22_CR20","doi-asserted-by":"publisher","unstructured":"Mitchell, M., Wu, S., Zaldivar, A., Barnes, P., Vasserman, L., et\u00a0al.: Model cards for model reporting. In: Proceedings of the Conference on Fairness, Accountability, and Transparency, FAT\u204e\u201919, pp. 220\u2013229. Association for Computing Machinery, New York (2019). https:\/\/doi.org\/10.1145\/3287560.3287596, https:\/\/doi-org.univaq.idm.oclc.org\/10.1145\/3287560.3287596","DOI":"10.1145\/3287560.3287596"},{"key":"22_CR21","doi-asserted-by":"publisher","unstructured":"Montes, D., Peerapatanapokin, P., Schultz, J., Guo, C., Jiang, W., et\u00a0al.: Discrepancies among pre-trained deep neural networks: a new threat to model zoo reliability. In: Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, pp. 1605\u20131609. ESEC\/FSE 2022. Association for Computing Machinery, New York, November 2022. https:\/\/doi.org\/10.1145\/3540250.3560881, https:\/\/dl.acm.org\/doi\/10.1145\/3540250.3560881","DOI":"10.1145\/3540250.3560881"},{"key":"22_CR22","doi-asserted-by":"publisher","unstructured":"Nguyen, P.T., Di\u00a0Rocco, J., Di\u00a0Ruscio, D., et\u00a0al.: FOCUS: a recommender system for mining API function calls and usage patterns. In: Proceedings of the 41st International Conference on Software Engineering, ICSE \u201919, pp. 1050\u20131060. IEEE Press, Piscataway (2019). https:\/\/doi.org\/10.1109\/ICSE.2019.00109","DOI":"10.1109\/ICSE.2019.00109"},{"key":"22_CR23","doi-asserted-by":"publisher","unstructured":"Nguyen, T.D., Nguyen, A.T., Phan, H.D., Nguyen, T.N.: Exploring API embedding for API usages and applications. In: 2017 IEEE\/ACM 39th International Conference on Software Engineering (ICSE), pp. 438\u2013449. IEEE, Buenos Aires, May 2017. https:\/\/doi.org\/10.1109\/ICSE.2017.47, http:\/\/ieeexplore.ieee.org\/document\/7985683\/","DOI":"10.1109\/ICSE.2017.47"},{"key":"22_CR24","doi-asserted-by":"publisher","unstructured":"Palomba, F., Bavota, G., Di\u00a0Penta, M., Fasano, F., Oliveto, R., De\u00a0Lucia, A.: On the diffuseness and the impact on maintainability of code smells: a large scale empirical investigation. In: Proceedings of the 40th International Conference on Software Engineering, ICSE \u201918, p.\u00a0482. Association for Computing Machinery, New York (2018). https:\/\/doi.org\/10.1145\/3180155.3182532","DOI":"10.1145\/3180155.3182532"},{"key":"22_CR25","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR26","doi-asserted-by":"publisher","unstructured":"Pepe, F., Nardone, V., Mastropaolo, A., Bavota, G., Canfora, G., Di\u00a0Penta, M.: How do hugging face models document datasets, bias, and licenses? an empirical study. In: Proceedings of the 32nd IEEE\/ACM International Conference on Program Comprehension, ICPC \u201924, pp. 370\u2013381. Association for Computing Machinery, New York (2024). https:\/\/doi.org\/10.1145\/3643916.3644412","DOI":"10.1145\/3643916.3644412"},{"key":"22_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2024.112151","volume":"216","author":"G Recupito","year":"2024","unstructured":"Recupito, G., Pecorelli, F., Catolino, G., Lenarduzzi, V., Taibi, D., Di Nucci, D., Palomba, F.: Technical debt in ai-enabled systems: On the prevalence, severity, impact, and management strategies for code and architecture. J. Syst. Softw. 216, 112151 (2024). https:\/\/doi.org\/10.1016\/j.jss.2024.112151","journal-title":"J. Syst. Softw."},{"key":"22_CR28","doi-asserted-by":"publisher","unstructured":"Tufano, R., Masiero, S., Mastropaolo, A., Pascarella, L., Poshyvanyk, D., et\u00a0al.: Using pre-trained models to boost code review automation. In: Proceedings of the 44th International Conference on Software Engineering, ICSE \u201922, pp. 2291\u20132302. Association for Computing Machinery, New York, July 2022. https:\/\/doi.org\/10.1145\/3510003.3510621","DOI":"10.1145\/3510003.3510621"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Vinutha, H., Poornima, B., Sagar, B.: Detection of outliers using interquartile range technique from intrusion dataset. In: Information and Decision Sciences: Proceedings of the 6th International Conference on Ficta, pp. 511\u2013518. Springer (2018)","DOI":"10.1007\/978-981-10-7563-6_53"},{"key":"22_CR30","doi-asserted-by":"publisher","unstructured":"Wang, J., Dang, Y., Zhang, H., Chen, K., Xie, T., Zhang, D.: Mining succinct and high-coverage api usage patterns from source code. In: 2013 10th Working Conference on Mining Software Repositories (MSR), pp. 319\u2013328 (2013). https:\/\/doi.org\/10.1109\/MSR.2013.6624045","DOI":"10.1109\/MSR.2013.6624045"},{"issue":"6","key":"22_CR31","doi-asserted-by":"publisher","first-page":"3524","DOI":"10.1109\/TSE.2023.3265362","volume":"49","author":"Y Xiao","year":"2023","unstructured":"Xiao, Y., Song, W., Qi, J., Viswanath, B., McDaniel, P., Yao, D.: Specializing neural networks for cryptographic code completion applications. IEEE Trans. Software Eng. 49(6), 3524\u20133535 (2023). https:\/\/doi.org\/10.1109\/TSE.2023.3265362. conference Name: IEEE Transactions on Software Engineering","journal-title":"IEEE Trans. Software Eng."},{"key":"22_CR32","doi-asserted-by":"publisher","unstructured":"Zhang, J., Mytkowicz, T., Kaufman, M., Piskac, R., Lahiri, S.K.: Using pre-trained language models to resolve textual and semantic merge conflicts (experience paper). In: Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis, pp. 77\u201388. ISSTA 2022. Association for Computing Machinery, New York, July 2022. https:\/\/doi.org\/10.1145\/3533767.3534396, https:\/\/dl.acm.org\/doi\/10.1145\/3533767.3534396","DOI":"10.1145\/3533767.3534396"},{"key":"22_CR33","doi-asserted-by":"publisher","unstructured":"Zhong, H., Xie, T., Zhang, L., Pei, J., Mei, H.: MAPO: mining and recommending API usage patterns. In: Drossopoulou, S. (ed.) ECOOP 2009. LNCS, vol. 5653, pp. 318\u2013343. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-03013-0_15","DOI":"10.1007\/978-3-642-03013-0_15"}],"container-title":["Lecture Notes in Computer Science","Software Engineering and Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04207-1_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T03:31:19Z","timestamp":1757388679000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04207-1_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,9]]},"ISBN":["9783032042064","9783032042071"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04207-1_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,9]]},"assertion":[{"value":"9 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SEAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Euromicro Conference on Software Engineering and Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Salerno","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"51","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"seaa-12025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dsd-seaa.com\/seaa2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}