{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T20:40:56Z","timestamp":1774557656910,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031164996","type":"print"},{"value":"9783031165009","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-16500-9_14","type":"book-chapter","created":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T10:02:31Z","timestamp":1667296951000},"page":"162-172","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Training Dataset and\u00a0Dictionary Sizes Matter in\u00a0BERT Models: The Case of\u00a0Baltic Languages"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3943-5568","authenticated-orcid":false,"given":"Matej","family":"Ul\u010dar","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1232-3320","authenticated-orcid":false,"given":"Marko","family":"Robnik-\u0160ikonja","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,2]]},"reference":[{"key":"14_CR1","unstructured":"Bommasani, R., et al.: On the opportunities and risks of foundation models. ArXiv preprint 2108.07258 (2021)"},{"key":"14_CR2","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems. vol. 33, pp. 1877\u20131901 (2020)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116 (2019)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"14_CR4","unstructured":"Dargis, R., Auzin\u0327a, I., Boj\u0101rs, U., Paikens, P., Znotin\u0327\u0161, A.: Annotation of the corpus of the Saeima with multilingual standards. In: Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC) (2018)"},{"key":"14_CR5","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186 (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"14_CR6","unstructured":"Ginter, F., Haji\u010d, J., Luotolahti, J., Straka, M., Zeman, D.: CoNLL 2017 shared task - automatically annotated raw texts and word embeddings (2017). http:\/\/hdl.handle.net\/11234\/1-1989, LINDAT\/CLARIN digital library"},{"key":"14_CR7","unstructured":"Jakub\u00ed\u010dek, M., Kilgarriff, A., Kov\u00e1\u0159, V., Rychl\u1ef3, P., Suchomel, V.: The TenTen corpus family. In: 7th International Corpus Linguistics Conference CL, pp. 125\u2013127 (2013)"},{"key":"14_CR8","unstructured":"Kuratov, Y., Arkhipov, M.: Adaptation of deep bidirectional multilingual transformers for Russian language. ArXiv preprint arXiv:1905.07213 (2019)"},{"key":"14_CR9","unstructured":"Laur, S.: Nime\u00fcksuste korpus. Center of Estonian Language Resources (2013)"},{"key":"14_CR10","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. ArXiv preprint 1907.11692 (2019)"},{"key":"14_CR11","unstructured":"Malmsten, M., B\u00f6rjeson, L., Haffenden, C.: Playing with Words at the National Library of Sweden - Making a Swedish BERT. ArXiv preprint 2007.01658 (2020)"},{"key":"14_CR12","unstructured":"Marcus, G., Davis, E.: Has AI found a new foundation? The Gradient, 11 September 2021 (2021)"},{"key":"14_CR13","unstructured":"Muischnek, K., M\u00fc\u00fcrisep, K., Puolakainen, T.: Estonian dependency treebank: from constraint grammar tagset to universal dependencies. In: Proceedings of LREC (2016)"},{"issue":"4","key":"14_CR14","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1162\/coli.07-056-R1-07-027","volume":"34","author":"J Nivre","year":"2008","unstructured":"Nivre, J.: Algorithms for Deterministic Incremental Dependency Parsing. Comput. Linguist. 34(4), 513\u2013553 (2008). https:\/\/doi.org\/10.1162\/coli.07-056-R1-07-027","journal-title":"Comput. Linguist."},{"key":"14_CR15","unstructured":"Nivre, J., Abrams, M., Agi\u0107, \u017d.: Universal dependencies 2.3 (2018). http:\/\/hdl.handle.net\/11234\/1-2895"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Ott, M., et al.: Fairseq: a fast, extensible toolkit for sequence modeling. In: Proceedings of NAACL-HLT 2019: Demonstrations (2019)","DOI":"10.18653\/v1\/N19-4009"},{"key":"14_CR17","doi-asserted-by":"publisher","unstructured":"Paikens, P., Auzin\u0327a, I., Garkaje, G., Paegle, M.: Towards named entity annotation of Latvian national library corpus. Front. Artif. Intell. Appl. 247, 169\u2013175 (2012). https:\/\/doi.org\/10.3233\/978-1-61499-133-5-169","DOI":"10.3233\/978-1-61499-133-5-169"},{"key":"14_CR18","unstructured":"Pinnis, M.: Latvian and Lithuanian named entity recognition with TildeNER. In: Proceedings of the 8th International Conference on Language Resources and Evaluation LREC 2012, pp. 1258\u20131265 (2012)"},{"key":"14_CR19","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"14_CR20","unstructured":"Rosa, R.: Plaintext Wikipedia dump 2018 (2018). http:\/\/hdl.handle.net\/11234\/1-2735, LINDAT\/CLARIAH-CZ digital library"},{"key":"14_CR21","unstructured":"Steinberger, R., Eisele, A., Klocek, S., Pilos, S., Schl\u00fcter, P.: DGT-TM: a freely available translation memory in 22 languages. In: Proceedings of the 8th International Conference on Language Resources and Evaluation LREC (2012)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Straka, M., N\u00e1plava, J., Strakov\u00e1, J., Samuel, D.: RobeCzech: Czech RoBERTa, a monolingual contextualized language representation model (2021)","DOI":"10.1007\/978-3-030-83527-9_17"},{"key":"14_CR23","doi-asserted-by":"publisher","unstructured":"Strzyz, M., Vilares, D., G\u00f3mez-Rodr\u00edguez, C.: Viable dependency parsing as sequence labeling. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 717\u2013723 (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1077","DOI":"10.18653\/v1\/N19-1077"},{"key":"14_CR24","unstructured":"Tanvir, H., Kittask, C., Sirts, K.: EstBERT: a pretrained language-specific BERT for Estonian. arXiv preprint 2011.04784 (2020)"},{"key":"14_CR25","unstructured":"Ul\u010dar, M., Vaik, K., Lindstr\u00f6m, J., Dailid\u0117nait\u0117, M., Robnik-\u0160ikonja, M.: Multilingual culture-independent word analogy datasets. In: Proceedings of the 12th Language Resources and Evaluation Conference, pp. 4067\u20134073 (2020)"},{"key":"14_CR26","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-030-58323-1_11","volume-title":"Text, Speech, and Dialogue","author":"M Ul\u010dar","year":"2020","unstructured":"Ul\u010dar, M., Robnik-\u0160ikonja, M.: FinEst BERT and CroSloEngual BERT. In: Sojka, P., Kope\u010dek, I., Pala, K., Hor\u00e1k, A. (eds.) TSD 2020. LNCS (LNAI), vol. 12284, pp. 104\u2013111. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58323-1_11"},{"key":"14_CR27","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"14_CR28","unstructured":"Virtanen, A., et al.: Multilingual is not enough: BERT for Finnish. arXiv preprint arXiv:1912.07076 (2019)"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Znotin\u0327\u0161, A., Barzdin\u0327\u0161, G.: LVBERT: transformer-based model for Latvian language understanding. In: Human Language Technologies-The Baltic Perspective: Proceedings of the Ninth International Conference Baltic HLT 2020, vol. 328, p. 111 (2020)","DOI":"10.3233\/FAIA200610"}],"container-title":["Lecture Notes in Computer Science","Analysis of Images, Social Networks and Texts"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-16500-9_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T10:04:25Z","timestamp":1667297065000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-16500-9_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031164996","9783031165009"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-16500-9_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"2 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIST","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Analysis of Images, Social Networks and Texts","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tbilisi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Georgia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aist2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aistconf.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"118","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.79","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Out of the 118 submission, 26 were rejected before being sent to peer review.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}