{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T02:42:58Z","timestamp":1778726578752,"version":"3.51.4"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030615338","type":"print"},{"value":"9783030615345","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-61534-5_27","type":"book-chapter","created":{"date-parts":[[2020,10,20]],"date-time":"2020-10-20T08:04:34Z","timestamp":1603181074000},"page":"301-314","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Pre-training Polish Transformer-Based Language Models at Scale"],"prefix":"10.1007","author":[{"given":"S\u0142awomir","family":"Dadas","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Micha\u0142","family":"Pere\u0142kiewicz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rafa\u0142","family":"Po\u015bwiata","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"27_CR1","unstructured":"Akbik, A., Blythe, D., Vollgraf, R.: Contextual string embeddings for sequence labeling. In: Proceedings of the 27th International Conference on Computational Linguistics, pp. 1638\u20131649 (2018)"},{"key":"27_CR2","unstructured":"Aleksander Wawer, E.M.: Results of the PolEval 2018 shared task 2: named entity recognition. In: Proceedings of the PolEval 2018 Workshop, pp. 53\u201362 (2018)"},{"key":"27_CR3","unstructured":"Antoun, W., Baly, F., Hajj, H.: AraBERT: transformer-based model for Arabic language understanding. arXiv preprint arXiv:2003.00104 (2020)"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"Arkhipov, M., Trofimova, M., Kuratov, Y., Sorokin, A.: Tuning multilingual transformers for language-specific named entity recognition. In: Proceedings of the 7th Workshop on Balto-Slavic Natural Language Processing, Florence, Italy, pp. 89\u201393. Association for Computational Linguistics, August 2019. https:\/\/www.aclweb.org\/anthology\/W19-3712","DOI":"10.18653\/v1\/W19-3712"},{"key":"27_CR5","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"27_CR6","unstructured":"Ca\u00f1ete, J., Chaperon, G., Fuentes, R., P\u00e9rez, J.: Spanish pre-trained Bert model and evaluation data. In: Practical ML for Developing Countries Workshop @ ICLR 2020 (2020)"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116 (2019)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"27_CR8","unstructured":"Conneau, A., Lample, G.: Cross-lingual language model pretraining. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32, pp. 7059\u20137069. Curran Associates, Inc. (2019). http:\/\/papers.nips.cc\/paper\/8928-cross-lingual-language-model-pretraining.pdf"},{"key":"27_CR9","unstructured":"Cui, Y., Che, W., Liu, T., Qin, B., Yang, Z., Wang, S., Hu, G.: Pre-training with whole word masking for Chinese BERT. arXiv preprint arXiv:1906.08101 (2019)"},{"key":"27_CR10","unstructured":"Czapla, P., Gugger, S., Howard, J., Kardas, M.: Universal language model fine-tuning for polish hate speech detection. In: Proceedings of the PolEval 2019 Workshop, p. 149 (2019)"},{"key":"27_CR11","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-3-030-20912-4_4","volume-title":"Artificial Intelligence and Soft Computing","author":"S Dadas","year":"2019","unstructured":"Dadas, S.: Combining neural and knowledge-based approaches to named entity recognition in polish. In: Rutkowski, L., Scherer, R., Korytkowski, M., Pedrycz, W., Tadeusiewicz, R., Zurada, J.M. (eds.) ICAISC 2019. LNCS (LNAI), vol. 11508, pp. 39\u201350. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-20912-4_4"},{"key":"27_CR12","unstructured":"Dadas, S., Pere\u0142kiewicz, M., Po\u015bwiata, R.: Evaluation of sentence representations in polish. In: Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, pp. 1674\u20131680. European Language Resources Association, May 2020. https:\/\/www.aclweb.org\/anthology\/2020.lrec-1.207"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q., Salakhutdinov, R.: Transformer-XL: attentive language models beyond a fixed-length context. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, , Florence, Italy, pp. 2978\u20132988. Association for Computational Linguistics, July 2019. https:\/\/www.aclweb.org\/anthology\/P19-1285","DOI":"10.18653\/v1\/P19-1285"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Delobelle, P., Winters, T., Berendt, B.: Robbert: a Dutch roberta-based language model. arXiv preprint arXiv:2001.06286 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.292"},{"key":"27_CR15","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics, June 2019. https:\/\/www.aclweb.org\/anthology\/N19-1423"},{"key":"27_CR16","unstructured":"Heafield, K.: KenLM: faster and smaller language model queries. In: Proceedings of the Sixth Workshop on Statistical Machine Translation, pp. 187\u2013197 (2011)"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Howard, J., Ruder, S.: Universal language model fine-tuning for text classification. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 328\u2013339 (2018)","DOI":"10.18653\/v1\/P18-1031"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Jawahar, G., Sagot, B., Seddah, D.: What does BERT learn about the structure of language? In: ACL 2019\u201357th Annual Meeting of the Association for Computational Linguistics. Florence, Italy, July 2019. https:\/\/hal.inria.fr\/hal-02131630","DOI":"10.18653\/v1\/P19-1356"},{"key":"27_CR19","unstructured":"Kitaev, N., Kaiser, L., Levskaya, A.: Reformer: the efficient transformer. In: International Conference on Learning Representations (2020)"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Koco\u0144, J., Mi\u0142kowski, P., Za\u015bko-Zieli\u0144ska, M.: Multi-level sentiment analysis of PolEmo 2.0: extended corpus of multi-domain consumer reviews. In: Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), Hong Kong, China, pp. 980\u2013991. Association for Computational Linguistics, November 2019. https:\/\/www.aclweb.org\/anthology\/K19-1092","DOI":"10.18653\/v1\/K19-1092"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Kovaleva, O., Romanov, A., Rogers, A., Rumshisky, A.: Revealing the dark secrets of BERT. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), Hong Kong, China, pp. 4365\u20134374. Association for Computational Linguistics, November 2019. https:\/\/www.aclweb.org\/anthology\/D19-1445","DOI":"10.18653\/v1\/D19-1445"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Kudo, T., Richardson, J.: SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Brussels, Belgium, pp. 66\u201371. Association for Computational Linguistics, November 2018. https:\/\/www.aclweb.org\/anthology\/D18-2012","DOI":"10.18653\/v1\/D18-2012"},{"key":"27_CR23","unstructured":"Kuratov, Y., Arkhipov, M.: Adaptation of deep bidirectional multilingual transformers for Russian language. arXiv preprint arXiv:1905.07213 (2019)"},{"key":"27_CR24","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: Albert: a lite BERT for self-supervised learning of language representations. In: International Conference on Learning Representations (2020)"},{"key":"27_CR25","unstructured":"Le, H., et al.: Flaubert: unsupervised language model pre-training for French. arXiv preprint arXiv:1912.05372 (2019)"},{"key":"27_CR26","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"27_CR27","unstructured":"Marci\u0144czuk, M., Ptak, M., Radziszewski, A., Piasecki, M.: Open dataset for development of polish question answering systems. In: Vetulani, Z., Uszkoreit, H. (eds.) Proceedings of Human Language Technologies as a Challenge for Computer Science and Linguistics 2013, pp. 479\u2013483. Fundacja UAM, Pozna\u0144 (2013)"},{"key":"27_CR28","unstructured":"Marelli, M., Menini, S., Baroni, M., Bentivogli, L., Bernardi, R., Zamparelli, R.: A SICK cure for the evaluation of compositional distributional semantic models. In: Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014), Reykjavik, Iceland, pp. 216\u2013223. European Language Resources Association (ELRA), May 2014. http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/pdf\/363_Paper.pdf"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Martin, L., et al.: CamemBERT: a tasty French language model. arXiv preprint arXiv:1911.03894 (2019)","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"27_CR30","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: Advances in Neural Information Processing Systems, pp. 3111\u20133119 (2013)"},{"key":"27_CR31","doi-asserted-by":"crossref","unstructured":"Nguyen, D.Q., Nguyen, A.T.: Phobert: pre-trained language models for Vietnamese. arXiv preprint arXiv:2003.00744 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.92"},{"key":"27_CR32","unstructured":"Ogrodniczuk, M., Kope\u0107, M.: The polish summaries corpus. In: Chair), N.C.C., Choukri, K., Declerck, T., Loftsson, H., Maegaard, B., Mariani, J., Moreno, A., Odijk, J., Piperidis, S. (eds.) Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014), Reykjavik, Iceland. European Language Resources Association (ELRA), May 2014"},{"key":"27_CR33","unstructured":"Peng, T.: The staggering cost of training SOTA AI models, June 2019. https:\/\/syncedreview.com\/2019\/06\/27\/the-staggering-cost-of-training-sota-ai-models\/"},{"key":"27_CR34","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"27_CR35","doi-asserted-by":"crossref","unstructured":"Peters, M., Neumann, M., Iyyer, M., Gardner, M., Clark, C., Lee, K., Zettlemoyer, L.: Deep contextualized word representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), vol. 1, pp. 2227\u20132237 (2018)","DOI":"10.18653\/v1\/N18-1202"},{"key":"27_CR36","unstructured":"Przepi\u00f3rkowski, A., Banko, M., G\u00f3rski, R.L., Lewandowska-Tomaszczyk, B.: Narodowy Korpus Jezyka Polskiego [Eng.: National Corpus of Polish]. Wydawnictwo Naukowe PWN, Warsaw (2012)"},{"key":"27_CR37","unstructured":"Ptaszynski, M., Pieciukiewicz, A., Dyba\u0142a, P.: Results of the PolEval 2019 shared task 6: first dataset and open shared task for automatic cyberbullying detection in polish Twitter. In: Proceedings of the PolEval 2019 Workshop, p. 89 (2019)"},{"key":"27_CR38","doi-asserted-by":"crossref","unstructured":"Rybak, P., Mroczkowski, R., Tracz, J., Gawlik, I.: KLEJ: comprehensive benchmark for polish language understanding. arXiv preprint arXiv:2005.00630 (2020)","DOI":"10.18653\/v1\/2020.acl-main.111"},{"key":"27_CR39","doi-asserted-by":"crossref","unstructured":"Shibuya, T., Hovy, E.: Nested named entity recognition via second-best sequence learning and decoding. arXiv preprint arXiv:1909.02250 (2019)","DOI":"10.1162\/tacl_a_00334"},{"key":"27_CR40","unstructured":"Souza, F., Nogueira, R., Lotufo, R.: Portuguese named entity recognition using BERT-CRF. arXiv preprint arXiv:1909.10649 (2019). http:\/\/arxiv.org\/abs\/1909.10649"},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: Ernie 2.0: a continual pre-training framework for language understanding. arXiv preprint arXiv:1907.12412 (2019)","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"27_CR42","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"27_CR43","unstructured":"Virtanen, A., et al.: Multilingual is not enough: Bert for finnish. arXiv preprint arXiv:1912.07076 (2019)"},{"key":"27_CR44","unstructured":"de Vries, W., van Cranenburgh, A., Bisazza, A., Caselli, T., van Noord, G., Nissim, M.: Bertje: a Dutch BERT model. arXiv preprint arXiv:1912.09582 (2019)"},{"key":"27_CR45","doi-asserted-by":"crossref","unstructured":"Wr\u00f3blewska, A., Krasnowska-Kiera\u015b, K.: Polish evaluation dataset for compositional distributional semantics models. In: Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Vancouver, Canada, pp. 784\u2013792. Association for Computational Linguistics, July 2017. https:\/\/www.aclweb.org\/anthology\/P17-1073","DOI":"10.18653\/v1\/P17-1073"},{"key":"27_CR46","unstructured":"Xu, L., Zhang, X., Dong, Q.: CLUECorpus2020: a large-scale Chinese corpus for pre-training language model. arXiv preprint arXiv:2003.01355 (2020)"},{"key":"27_CR47","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R.R., Le, Q.V.: XLNet: generalized autoregressive pretraining for language understanding. In: Advances in Neural Information Processing Systems, pp. 5754\u20135764 (2019)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence and Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-61534-5_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,23]],"date-time":"2022-11-23T23:54:06Z","timestamp":1669247646000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-61534-5_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030615338","9783030615345"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-61534-5_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAISC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence and Soft Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zakopane","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaisc2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icaisc.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}