{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T12:41:39Z","timestamp":1759495299665,"version":"build-2065373602"},"publisher-location":"Berlin, Heidelberg","reference-count":48,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783662722428","type":"print"},{"value":"9783662722435","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,4]],"date-time":"2025-10-04T00:00:00Z","timestamp":1759536000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,4]],"date-time":"2025-10-04T00:00:00Z","timestamp":1759536000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-662-72243-5_17","type":"book-chapter","created":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T12:14:44Z","timestamp":1759493684000},"page":"292-309","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Benchmark to\u00a0Evaluate LLMs\u2019 Proficiency on\u00a0Italian Student Competencies"],"prefix":"10.1007","author":[{"given":"Fabio","family":"Mercorio","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mario","family":"Mezzanzanica","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniele","family":"Potert\u00ec","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Antonio","family":"Serino","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrea","family":"Seveso","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,4]]},"reference":[{"key":"17_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"17_CR2","unstructured":"AI@Meta: Llama 3 model card (2024). https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md"},{"key":"17_CR3","unstructured":"Armengol-Estap\u00e9, J., Bonet, O.D.G., Melero, M.: On the multilingual capabilities of very large-scale english language models. arXiv preprint arXiv:2108.13349 (2021)"},{"key":"17_CR4","unstructured":"Bacciu, A., Trappolini, G., Santilli, A., Rodol\u00e0, E., Silvestri, F.: Fauno: the Italian large language model that will leave you senza parole! arXiv preprint arXiv:2306.14457 (2023)"},{"key":"17_CR5","unstructured":"Basile, P., Musacchio, E., Polignano, M., Siciliani, L., Fiameni, G., Semeraro, G.: Llamantino: Llama 2 models for effective text generation in Italian language. arXiv preprint arXiv:2312.09993 (2023)"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Basile, V., Bioglio, L., Bosca, A., Bosco, C., Patti, V.: Uinauil: a unified benchmark for italian natural language understanding. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, vol. 3: System Demonstrations, pp. 348\u2013356 (2023)","DOI":"10.18653\/v1\/2023.acl-demo.33"},{"key":"17_CR7","unstructured":"Bolondi, G., Gambini, A., Ferretti, F.: Il database gestinv delle prove standardizzate invalsi: Uno strumento per la ricerca: Alcuni esempi di utilizzo nell\u2019ambito della matematica. In: I dati INVALSI: Uno strumento per la ricerca, pp. 43\u201348. Franco Angeli (2017)"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Carey, S., Markman, E.M.: Cognitive development. In: Cognitive Science, pp. 201\u2013254. Elsevier (1999)","DOI":"10.1016\/B978-012601730-4\/50007-X"},{"key":"17_CR9","unstructured":"Chang, Y., et al.: A survey on evaluation of large language models (2023). http:\/\/arxiv.org\/abs\/2307.03109"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Chung, H.W., Garrette, D., Tan, K.C., Riesa, J.: Improving multilingual models with language-clustered vocabularies. arXiv preprint arXiv:2010.12777 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.367"},{"key":"17_CR11","unstructured":"Clark, P., et al.: Think you have solved question answering? Try arc, the ai2 reasoning challenge. arXiv preprint arXiv:1803.05457 (2018)"},{"key":"17_CR12","unstructured":"Cobbe, K., et\u00a0al.: Training verifiers to solve math word problems (2021). https:\/\/arxiv.org\/abs\/2110.14168"},{"key":"17_CR13","first-page":"46","volume":"10","author":"C Corsini","year":"2013","unstructured":"Corsini, C.: La validit\u00e0 di contenuto delle prove invalsi di comprensione della lettura. Ital. J. Educ. Res. 10, 46\u201361 (2013)","journal-title":"Ital. J. Educ. Res."},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Corsini, C., Losito, B.: Le rilevazioni invalsi: a che cosa servono? Cadmo: giornale italiano di pedagogia sperimentale: 2, 2013, pp. 55\u201376 (2013)","DOI":"10.3280\/CAD2013-002006"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"De\u00a0Mattei, L., Cafagna, M., Dell\u2019Orletta, F., Nissim, M., Guerini, M.: Geppetto carves italian into a language model. arXiv preprint arXiv:2004.14253 (2020)","DOI":"10.4000\/books.aaccademia.8438"},{"key":"17_CR16","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: Qlora: efficient finetuning of quantized llms. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"17_CR17","unstructured":"Guzzo, G.: La competenza grammaticale nelle prove invalsi (2023)"},{"key":"17_CR18","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"17_CR19","unstructured":"Jiang, A.Q., et\u00a0al.: Mistral 7b. arXiv preprint arXiv:2310.06825 (2023)"},{"key":"17_CR20","unstructured":"Jiang, A.Q., et\u00a0al.: Mixtral of experts. arXiv preprint arXiv:2401.04088 (2024)"},{"key":"17_CR21","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1162\/tacl_a_00447","volume":"10","author":"J Kreutzer","year":"2022","unstructured":"Kreutzer, J., et al.: Quality at a glance: an audit of web-crawled multilingual datasets. Trans. Assoc. Comput. Linguist. 10, 50\u201372 (2022)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"17_CR22","unstructured":"Lai, M., Menini, S., Polignano, M., Russo, V., Sprugnoli, R., Venturi, G., et\u00a0al.: Evalita 2023: overview of the 8th evaluation campaign of natural language processing and speech tools for italian. In: Proceedings of the Eighth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2023). CEUR. org, Parma (2023)"},{"issue":"5","key":"17_CR23","doi-asserted-by":"publisher","first-page":"228","DOI":"10.3390\/info13050228","volume":"13","author":"N Landro","year":"2022","unstructured":"Landro, N., Gallo, I., La Grassa, R., Federici, E.: Two new datasets for italian-language abstractive text summarization. Information 13(5), 228 (2022)","journal-title":"Information"},{"key":"17_CR24","doi-asserted-by":"publisher","first-page":"576","DOI":"10.1162\/tacl_a_00655","volume":"12","author":"J Li","year":"2024","unstructured":"Li, J., Zhou, H., Huang, S., Cheng, S., Chen, J.: Eliciting the translation ability of large language models via multilingual finetuning with translation instructions. Trans. Assoc. Comput. Linguist. 12, 576\u2013592 (2024)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Liang, D., et al.: Xlm-v: overcoming the vocabulary bottleneck in multilingual masked language models. arXiv preprint arXiv:2301.10472 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.813"},{"key":"17_CR26","unstructured":"Liang, P., et\u00a0al.: Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Lin, S., Hilton, J., Evans, O.: Truthfulqa: measuring how models mimic human falsehoods. arXiv preprint arXiv:2109.07958 (2021)","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"17_CR28","unstructured":"Mangrulkar, S., Gugger, S., Debut, L., Belkada, Y., Paul, S., Bossan, B.: Peft: state-of-the-art parameter-efficient fine-tuning methods (2022). https:\/\/github.com\/huggingface\/peft"},{"key":"17_CR29","unstructured":"Mercorio, F., Potert\u00ec, D., Serino, A., Seveso, A., et\u00a0al.: Beep-best driver\u2019s license performer: a calamita challenge. In: CEUR Workshop Proceedings, vol.\u00a03878 (2024)"},{"key":"17_CR30","unstructured":"Orlando, R., et\u00a0al.: Minerva llms: the first family of large language models trained from scratch on Italian data. In: Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024) (2024)"},{"key":"17_CR31","unstructured":"Pastore, S., Freddano, M., et\u00a0al.: \u201cquestione di feedback\u201d: dati invalsi e pratiche di valutazione in classe. In: I dati INVALSI: uno strumento per la ricerca, pp. 89\u2013100. FrancoAngeli (2017)"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Pires, T., Schlinger, E., Garrette, D.: How multilingual is multilingual bert? arXiv preprint arXiv:1906.01502 (2019)","DOI":"10.18653\/v1\/P19-1493"},{"key":"17_CR33","unstructured":"Polignano, M., Basile, P., Semeraro, G.: Advanced natural-based interaction for the Italian language: Llamantino-3-anita (2024)"},{"key":"17_CR34","unstructured":"Puccetti, G., Cassese, M., Esuli, A.: The invalsi benchmarks: measuring the linguistic and mathematical understanding of large language models in Italian. In: Rambow, O., Wanner, L., Apidianaki, M., Al-Khalifa, H., Eugenio, B.D., Schockaert, S. (eds.) Proceedings of the 31st International Conference on Computational Linguistics, pp. 6782\u20136797. Association for Computational Linguistics, Abu Dhabi (2025). https:\/\/aclanthology.org\/2025.coling-main.453\/"},{"key":"17_CR35","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018)"},{"key":"17_CR36","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P.: Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)","DOI":"10.18653\/v1\/D16-1264"},{"key":"17_CR37","unstructured":"Reid, M., et\u00a0al.: Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"17_CR38","doi-asserted-by":"crossref","unstructured":"Ruder, S., et\u00a0al.: Xtreme-r: towards more challenging and nuanced multilingual evaluation. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.802"},{"key":"17_CR39","unstructured":"Santilli, A., Rodol\u00e0, E.: Camoscio: an Italian instruction-tuned llama. arXiv preprint arXiv:2307.16456 (2023)"},{"key":"17_CR40","doi-asserted-by":"crossref","unstructured":"Seveso, A., Potert\u00ec, D., Federici, E., Mezzanzanica, M., Mercorio, F., et\u00a0al.: Italic: an Italian culture-aware natural language benchmark. In: Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies, 29 April\u20134 May 2025, vol. 1: Long Papers), pp. 1469\u20131478 (2025)","DOI":"10.18653\/v1\/2025.naacl-long.68"},{"key":"17_CR41","unstructured":"Srivastava, A., et\u00a0al.: Beyond the imitation game: quantifying and extrapolating the capabilities of language models. arXiv preprint arXiv:2206.04615 (2022)"},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Talat, Z., et\u00a0al.: You reap what you sow: on the challenges of bias evaluation under multilingual settings. In: Proceedings of BigScience Episode# 5\u2013Workshop on Challenges & Perspectives in Creating Large Language Models, pp. 26\u201341 (2022)","DOI":"10.18653\/v1\/2022.bigscience-1.3"},{"key":"17_CR43","first-page":"277","volume":"12","author":"Z T\u00f3th","year":"2023","unstructured":"T\u00f3th, Z.: Riflettere sulle parole: la formazione delle parole nelle prove invalsi. Lingue antiche e moderne 12, 277\u2013298 (2023)","journal-title":"Lingue antiche e moderne"},{"key":"17_CR44","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"17_CR45","unstructured":"Trinchero, R.: Il servizio nazionale di valutazione e le prove invalsi. stato dell\u2019arte e proposte per una valutazione come agente di cambiamento. Form@ re-Open Journal per la formazione in rete 14(4), 34\u201349 (2014)"},{"key":"17_CR46","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"17_CR47","doi-asserted-by":"crossref","unstructured":"Zellers, R., Holtzman, A., Bisk, Y., Farhadi, A., Choi, Y.: Hellaswag: can a machine really finish your sentence? arXiv preprint arXiv:1905.07830 (2019)","DOI":"10.18653\/v1\/P19-1472"},{"key":"17_CR48","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: Bertscore: evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track and Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-72243-5_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T12:15:06Z","timestamp":1759493706000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-662-72243-5_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,4]]},"ISBN":["9783662722428","9783662722435"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-72243-5_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,4]]},"assertion":[{"value":"4 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}