{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:22:01Z","timestamp":1761898921936,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032037046","type":"print"},{"value":"9783032037053","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-03705-3_6","type":"book-chapter","created":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:39:14Z","timestamp":1761896354000},"page":"60-71","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluating Polish Linguistic and\u00a0Cultural Competency in\u00a0Large Language Models"],"prefix":"10.1007","author":[{"given":"S\u0142awomir","family":"Dadas","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ma\u0142gorzata","family":"Gr\u0229bowiec","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Micha\u0142","family":"Pere\u0142kiewicz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rafa\u0142","family":"Po\u015bwiata","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,1]]},"reference":[{"key":"6_CR1","unstructured":"Anthropic: The Claude 3 model family: Opus, Sonnet, Haiku (2024). https:\/\/api.semanticscholar.org\/CorpusID:268232499"},{"key":"6_CR2","unstructured":"Aryabumi, V., et\u00a0al.: Aya 23: open weight releases to further multilingual progress. arXiv preprint arXiv:2405.15032 (2024)"},{"key":"6_CR3","doi-asserted-by":"publisher","unstructured":"Chang, Y., et al.: A survey on evaluation of large language models. ACM Trans. Intell. Syst. Technol. 15(3) (2024). https:\/\/doi.org\/10.1145\/3641289","DOI":"10.1145\/3641289"},{"key":"6_CR4","unstructured":"Chen, M., et\u00a0al.: Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)"},{"key":"6_CR5","unstructured":"Chiang, W.L., et\u00a0al.: Chatbot arena: an open platform for evaluating LLMs by human preference. arXiv preprint arXiv:2403.04132 (2024)"},{"key":"6_CR6","unstructured":"Chiu, Y.Y., et al.: CulturalBench: a robust, diverse and challenging benchmark on measuring the (lack of) cultural knowledge of LLMs (2024)"},{"key":"6_CR7","unstructured":"Cobbe, K., et al.: Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)"},{"key":"6_CR8","unstructured":"Dadas, S., Pere\u0142kiewicz, M., Po\u015bwiata, R.: Evaluation of sentence representations in Polish. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 1674\u20131680 (2020)"},{"key":"6_CR9","unstructured":"Dubey, A., et\u00a0al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"6_CR10","unstructured":"Dubois, Y., Liang, P., Hashimoto, T.: Length-controlled AlpacaEval: a simple debiasing of automatic evaluators. In: First Conference on Language Modeling (2024). https:\/\/openreview.net\/forum?id=CybBmzWBX0"},{"key":"6_CR11","unstructured":"Etxaniz, J., Azkune, G., Soroa, A., de\u00a0Lacalle, O.L., Artetxe, M.: BertaQA: how much do language models know about local culture? (2024)"},{"key":"6_CR12","unstructured":"Gomez, A.: Introducing command R+: a scalable LLM built for business\u2014cohere.com (2024). https:\/\/cohere.com\/blog\/command-r-plus-microsoft-azure. Accessed 26 Sept 2024"},{"key":"6_CR13","unstructured":"Grzybowski, \u0141., Pokrywka, J., Ciesi\u00f3\u0142ka, M., Kaczmarek, J.I., Kubis, M.: Polish medical exams: a new dataset for cross-lingual medical knowledge transfer assessment (2024). https:\/\/arxiv.org\/abs\/2412.00559"},{"key":"6_CR14","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding. In: Proceedings of the International Conference on Learning Representations (ICLR) (2021a)"},{"key":"6_CR15","unstructured":"Hendrycks, D., et al.: Measuring mathematical problem solving with the math dataset. In: NeurIPS (2021b)"},{"key":"6_CR16","unstructured":"Jiang, A.Q., et\u00a0al.: Mistral 7b. arXiv preprint arXiv:2310.06825 (2023)"},{"key":"6_CR17","unstructured":"Jiang, A.Q., et\u00a0al.: Mixtral of experts. arXiv preprint arXiv:2401.04088 (2024)"},{"key":"6_CR18","unstructured":"Kazienko, P., et al.: PLLuM: towards polish large language model. In: PP-RAI 2024 - 5th Polish Conference on Artificial Intelligence (2024)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Laskar, M.T.R., et al.: A systematic survey and critical review on evaluating large language models: challenges, limitations, and recommendations. arXiv preprint arXiv:2407.04069 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.764"},{"key":"6_CR20","unstructured":"Liu, X., et al.: AgentBench: evaluating LLMs as agents. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=zAdUB0aCTQ"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"McIntosh, T.R., Susnjak, T., Liu, T., Watters, P., Halgamuge, M.N.: Inadequacies of large language model benchmarks in the era of generative artificial intelligence. arXiv preprint arXiv:2402.09880 (2024)","DOI":"10.1109\/TAI.2025.3569516"},{"key":"6_CR22","unstructured":"Mousi, B., et al.: AraDiCE: benchmarks for dialectal and cultural capabilities in LLMs (2024). https:\/\/arxiv.org\/abs\/2409.11404"},{"key":"6_CR23","unstructured":"Myung, J., et al.: BLEnd: a benchmark for LLMs on everyday knowledge in diverse cultures and languages. In: The Thirty-Eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2024)"},{"key":"6_CR24","unstructured":"Ociepa, K., Flis, \u0141., Wr\u00f3bel, K., Gwo\u017adziej, A., Kinas, R.: Bielik 7B v0.1: a Polish language model \u2013 development, insights, and evaluation (2024)"},{"key":"6_CR25","unstructured":"OpenAI: GPT-4 technical report (2024). https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"6_CR26","unstructured":"Reid, M., et\u00a0al.: Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"6_CR27","unstructured":"Rein, D., et al.: GPQA: a graduate-level google-proof Q &A benchmark. In: First Conference on Language Modeling (2024)"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Rybak, P., Mroczkowski, R., Tracz, J., Gawlik, I.: KLEJ: comprehensive benchmark for polish language understanding. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 1191\u20131201 (2020)","DOI":"10.18653\/v1\/2020.acl-main.111"},{"key":"6_CR29","unstructured":"Rybak, P., Przyby\u0142a, P., Ogrodniczuk, M.: PolQA: Polish question answering dataset. In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp. 12846\u201312855 (2024)"},{"key":"6_CR30","unstructured":"Sprague, Z.R., Ye, X., Bostrom, K., Chaudhuri, S., Durrett, G.: MuSR: testing the limits of chain-of-thought with multistep soft reasoning. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"6_CR31","unstructured":"Sta\u0144, I.: Why is the linguistic context important while working on textual data?, November 2023. https:\/\/medium.com\/@izab.sta\/why-is-the-linguistic-context-important-ba9069cf34de"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Suzgun, M., et al.: Challenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint arXiv:2210.09261 (2022)","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"6_CR33","doi-asserted-by":"publisher","unstructured":"Tuora, R., Zwierzchowska, A., Zawadzka-Paluektau, N., Klamra, C., Kobyli\u0144ski, L.: PoQuAD - the Polish question answering dataset - description and analysis. In: Proceedings of the 12th Knowledge Capture Conference 2023, K-CAP 2023, pp. 105\u2013113. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3587259.3627548","DOI":"10.1145\/3587259.3627548"},{"key":"6_CR34","unstructured":"Wang, Y., et\u00a0al.: MMLU-Pro: a more robust and challenging multi-task language understanding benchmark. arXiv preprint arXiv:2406.01574 (2024)"},{"key":"6_CR35","unstructured":"Yang, A., et\u00a0al.: Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)"},{"key":"6_CR36","unstructured":"Zheng, L., et al.: Judging LLM-as-a-judge with MT-bench and chatbot arena. In: Proceedings of the 37th International Conference on Neural Information Processing Systems, NIPS 2023. Curran Associates Inc., Red Hook, NY, USA (2024)"},{"key":"6_CR37","unstructured":"Zhou, J., et al.: Instruction-following evaluation for large language models. arXiv preprint arXiv:2311.07911 (2023)"},{"key":"6_CR38","unstructured":"Zhuo, T.Y., et\u00a0al.: BigCodeBench: benchmarking code generation with diverse function calls and complex instructions. arXiv preprint arXiv:2406.15877 (2024)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence and Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-03705-3_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:39:43Z","timestamp":1761896383000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-03705-3_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,1]]},"ISBN":["9783032037046","9783032037053"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-03705-3_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,1]]},"assertion":[{"value":"1 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAISC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence and Soft Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zakopane","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaisc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icaisc.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}