{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,21]],"date-time":"2026-07-21T04:04:41Z","timestamp":1784606681069,"version":"3.55.0"},"publisher-location":"Cham","reference-count":76,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031453915","type":"print"},{"value":"9783031453922","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-45392-2_15","type":"book-chapter","created":{"date-parts":[[2023,10,11]],"date-time":"2023-10-11T20:17:29Z","timestamp":1697055449000},"page":"226-240","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":68,"title":["Sabi\u00e1: Portuguese Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0023-1971","authenticated-orcid":false,"given":"Ramon","family":"Pires","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5208-0290","authenticated-orcid":false,"given":"Hugo","family":"Abonizio","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9568-9331","authenticated-orcid":false,"given":"Thales Sales","family":"Almeida","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2600-6035","authenticated-orcid":false,"given":"Rodrigo","family":"Nogueira","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,10,12]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Almeida, T.S., Laitz, T., Bon\u00e1s, G.K., Nogueira, R.: Bluex: A benchmark based on Brazilian leading universities entrance exams. To appear (2023)","DOI":"10.1007\/978-3-031-45368-7_22"},{"key":"15_CR2","unstructured":"Antoun, W., Baly, F., Hajj, H.: AraBERT: Transformer-based model for Arabic language understanding. In: Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection. pp. 9\u201315. European Language Resource Association, Marseille, France (2020)"},{"key":"15_CR3","unstructured":"Barros, T.M.d., et al.: Employing transformers and emoji to perform sentiment classification of social media texts: Utilizando transformers e emoji na classifica\u00e7\u00e3o de sentimento de textos oriundos de redes sociais (2021)"},{"key":"15_CR4","doi-asserted-by":"publisher","unstructured":"Bhattacharjee, A., et al.: BanglaBERT: Language model pretraining and benchmarks for low-resource language understanding evaluation in Bangla. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp. 1318\u20131327. Association for Computational Linguistics, Seattle, United States (2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.98","DOI":"10.18653\/v1\/2022.findings-naacl.98"},{"key":"15_CR5","unstructured":"Brum, H., Volpe Nunes, M.d.G.: Building a sentiment corpus of tweets in Brazilian Portuguese. In: Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018). European Language Resources Association (ELRA), Miyazaki, Japan (May 2018)"},{"key":"15_CR6","unstructured":"Ca\u00f1ete, J., Chaperon, G., Fuentes, R., Ho, J.H., Kang, H., P\u00e9rez, J.: Spanish pre-trained BERT model and evaluation data. In: PML4DC at ICLR 2020 (2020)"},{"key":"15_CR7","unstructured":"Carmo, D., Piau, M., Campiotti, I., Nogueira, R., Lotufo, R.: Ptt5: Pretraining and validating the t5 model on brazilian portuguese data. arXiv preprint arXiv:2008.09144 (2020)"},{"key":"15_CR8","doi-asserted-by":"publisher","unstructured":"Chan, B., Schweter, S., M\u00f6ller, T.: German\u2019s next language model. In: Proceedings of the 28th International Conference on Computational Linguistics, pp. 6788\u20136796. International Committee on Computational Linguistics, Barcelona, Spain (Online) (2020). https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.598","DOI":"10.18653\/v1\/2020.coling-main.598"},{"key":"15_CR9","doi-asserted-by":"publisher","unstructured":"Chaves Rodrigues, R., Tanti, M., Agerri, R.: Evaluation of Portuguese Language Models (2023). https:\/\/doi.org\/10.5281\/zenodo.7781848, https:\/\/github.com\/ruanchaves\/eplm","DOI":"10.5281\/zenodo.7781848"},{"key":"15_CR10","unstructured":"Chowdhery, A., et al.: Palm: Scaling language modeling with pathways. arXiv preprint arXiv:2204.02311 (2022)"},{"key":"15_CR11","doi-asserted-by":"publisher","unstructured":"Clark, C., Lee, K., Chang, M.W., Kwiatkowski, T., Collins, M., Toutanova, K.: BoolQ: Exploring the surprising difficulty of natural yes\/no questions. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 2924\u20132936. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1300","DOI":"10.18653\/v1\/N19-1300"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 8440\u20138451 (2020)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Ebrahimi, A., Kann, K.: How to adapt your pretrained multilingual model to 1600 languages. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 4555\u20134567. Association for Computational Linguistics, Online (Aug 2021). 10.18653\/v1\/2021.acl-long.351","DOI":"10.18653\/v1\/2021.acl-long.351"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"FitzGerald, J., et al.: MASSIVE: A 1m-example multilingual natural language understanding dataset with 51 typologically-diverse languages (2022)","DOI":"10.18653\/v1\/2023.acl-long.235"},{"key":"15_CR15","unstructured":"Fu, Y., Peng, H., Ou, L., Sabharwal, A., Khot, T.: Specializing smaller language models towards multi-step reasoning. arXiv preprint arXiv:2301.12726 (2023)"},{"key":"15_CR16","unstructured":"Gao, L., et al.: The pile: An 800gb dataset of diverse text for language modeling. arXiv preprint arXiv:2101.00027 (2020)"},{"key":"15_CR17","unstructured":"Gururangan, S., et al.: Scaling expert language models with unsupervised domain discovery. arXiv preprint arXiv:2303.14177 (2023)"},{"key":"15_CR18","unstructured":"Hoffmann, J., et al.: Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)"},{"key":"15_CR19","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"15_CR20","unstructured":"Hu, E.J., et al.: LoRA: Low-rank adaptation of large language models. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Kalyan, K.S., Rajasekharan, A., Sangeetha, S.: Ammus: a survey of transformer-based pretrained models in natural language processing. arXiv preprint arXiv:2108.05542 (2021)","DOI":"10.1016\/j.jbi.2021.103982"},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Kim, B., et al.: What changes can large-scale language models bring? intensive study on hyperclova: Billions-scale korean generative pretrained transformers. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 3405\u20133424 (2021K","DOI":"10.18653\/v1\/2021.emnlp-main.274"},{"key":"15_CR23","doi-asserted-by":"publisher","unstructured":"Kudo, T., Richardson, J.: SentencePiece: A simple and language independent subword tokenizer and detokenizer for neural text processing. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 66\u201371. Association for Computational Linguistics, Brussels, Belgium (2018). https:\/\/doi.org\/10.18653\/v1\/D18-2012","DOI":"10.18653\/v1\/D18-2012"},{"key":"15_CR24","unstructured":"Le, H., et al.: FlauBERT: Unsupervised language model pre-training for French. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 2479\u20132490. European Language Resources Association, Marseille, France (2020)"},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Lee, H., Yoon, J., Hwang, B., Joe, S., Min, S., Gwon, Y.: Korealbert: Pretraining a lite bert model for korean language understanding. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 5551\u20135557. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412023"},{"key":"15_CR26","unstructured":"Lewkowycz, A., et al.: Solving quantitative reasoning problems with language models. arXiv preprint arXiv:2206.14858 (2022)"},{"key":"15_CR27","unstructured":"Lin, X.V., et al.: Few-shot learning with multilingual generative language models. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 9019\u20139052 (2022)"},{"key":"15_CR28","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1162\/tacl_a_00343","volume":"8","author":"Y Liu","year":"2020","unstructured":"Liu, Y., et al.: Multilingual denoising pre-training for neural machine translation. Trans. Assoc. Comput. Linguist. 8, 726\u2013742 (2020)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"15_CR29","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1162\/tacl_a_00433","volume":"9","author":"S Longpre","year":"2021","unstructured":"Longpre, S., Lu, Y., Daiber, J.: MKQA: a linguistically diverse benchmark for multilingual open domain question answering. Trans. Assoc. Comput. Linguist. 9, 1389\u20131406 (2021)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"15_CR30","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019)"},{"key":"15_CR31","unstructured":"Maas, A.L., Daly, R.E., Pham, P.T., Huang, D., Ng, A.Y., Potts, C.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150. Association for Computational Linguistics, Portland, Oregon, USA (2011)"},{"key":"15_CR32","doi-asserted-by":"publisher","unstructured":"Martin, L., et al.: CamemBERT: a tasty French language model. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. pp. 7203\u20137219. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.645","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"15_CR33","doi-asserted-by":"crossref","unstructured":"de Melo, G., Imaizumi, V., Cozman, F.: Winograd schemas in portuguese. In: Anais do XVI Encontro Nacional de Intelig\u00eancia Artificial e Computacional, pp. 787\u2013798. SBC (2019)","DOI":"10.5753\/eniac.2019.9334"},{"key":"15_CR34","unstructured":"Moraes, G., Bonif\u00e1cio, L.H., Rodrigues de Souza, L., Nogueira, R., Lotufo, R.: A cost-benefit analysis of cross-lingual transfer methods. arXiv preprint arXiv:2105.06813 (2021). https:\/\/arxiv.org\/abs\/2105.06813"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Muennighoff, N., et al.: Crosslingual generalization through multitask finetuning (2022)","DOI":"10.18653\/v1\/2023.acl-long.891"},{"key":"15_CR36","doi-asserted-by":"publisher","unstructured":"Nguyen, D.Q., Tuan Nguyen, A.: PhoBERT: Pre-trained language models for Vietnamese. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 1037\u20131042. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.92","DOI":"10.18653\/v1\/2020.findings-emnlp.92"},{"key":"15_CR37","unstructured":"Nunes, D., Primi, R., Pires, R., Lotufo, R., Nogueira, R.: Evaluating gpt-3.5 and gpt-4 models on brazilian university admission exams (2023)"},{"key":"15_CR38","doi-asserted-by":"crossref","unstructured":"Ogueji, K., Zhu, Y., Lin, J.: Small data? no problem! exploring the viability of pretrained multilingual language models for low-resourced languages. In: Proceedings of the 1st Workshop on Multilingual Representation Learning, pp. 116\u2013126. Association for Computational Linguistics, Punta Cana, Dominican Republic (2021)","DOI":"10.18653\/v1\/2021.mrl-1.11"},{"key":"15_CR39","unstructured":"OpenAI: Gpt-4 technical report (2023)"},{"key":"15_CR40","doi-asserted-by":"crossref","unstructured":"Overwijk, A., Xiong, C., Callan, J.: Clueweb 22: 10 billion web documents with rich information. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3360\u20133362 (2022)","DOI":"10.1145\/3477495.3536321"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Overwijk, A., Xiong, C., Liu, X., VandenBerg, C., Callan, J.: Clueweb 22: 10 billion web documents with visual and semantic information (2022)","DOI":"10.1145\/3477495.3536321"},{"key":"15_CR42","doi-asserted-by":"publisher","unstructured":"Pfeiffer, J., Kamath, A., R\u00fcckl\u00e9, A., Cho, K., Gurevych, I.: AdapterFusion: Non-destructive task composition for transfer learning. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 487\u2013503. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.eacl-main.39","DOI":"10.18653\/v1\/2021.eacl-main.39"},{"key":"15_CR43","doi-asserted-by":"crossref","unstructured":"Pfeiffer, J., Vuli\u0107, I., Gurevych, I., Ruder, S.: Mad-x: An adapter-based framework for multi-task cross-lingual transfer. arXiv preprint arXiv:2005.00052 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.617"},{"issue":"8","key":"15_CR44","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"15_CR45","unstructured":"Rae, J.W., et al.: Scaling language models: Methods, analysis & insights from training gopher. arXiv preprint arXiv:2112.11446 (2021)"},{"issue":"1","key":"15_CR46","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR47","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1007\/978-3-030-41505-1_39","volume-title":"Computational Processing of the Portuguese Language","author":"L Real","year":"2020","unstructured":"Real, L., Fonseca, E., Gon\u00e7alo Oliveira, H.: The ASSIN 2 shared task: a quick overview. In: Quaresma, P., Vieira, R., Alu\u00edsio, S., Moniz, H., Batista, F., Gon\u00e7alves, T. (eds.) PROPOR 2020. LNCS (LNAI), vol. 12037, pp. 406\u2013412. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-41505-1_39"},{"key":"15_CR48","unstructured":"Roberts, A., et al.: Scaling up models and data with t5x and seqio. arXiv preprint arXiv:2203.17189 13 (2022)"},{"key":"15_CR49","unstructured":"Rosa, G.M., Bonifacio, L.H., de Souza, L.R., Lotufo, R., Nogueira, R.: A cost-benefit analysis of cross-lingual transfer methods. arXiv preprint arXiv:2105.06813 (2021)"},{"key":"15_CR50","unstructured":"la Rosa, J.D., Fern\u00e1ndez, A.: Zero-shot reading comprehension and reasoning for spanish with BERTIN GPT-J-6B. In: y G\u00f3mez, M.M., (eds.) Proceedings of the Iberian Languages Evaluation Forum (IberLEF 2022). CEUR Workshop Proceedings (2022)"},{"issue":"9","key":"15_CR51","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3474381","volume":"64","author":"K Sakaguchi","year":"2021","unstructured":"Sakaguchi, K., Bras, R.L., Bhagavatula, C., Choi, Y.: Winogrande: an adversarial winograd schema challenge at scale. Commun. ACM 64(9), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"15_CR52","unstructured":"Sarti, G., Nissim, M.: It5: Large-scale text-to-text pretraining for italian language understanding and generation. arXiv preprint arXiv:2203.03759 (2022)"},{"key":"15_CR53","doi-asserted-by":"publisher","unstructured":"Sayama, H.F., Araujo, A.V., Fernandes, E.R.: FaQuAD: Reading comprehension dataset in the domain of brazilian higher education. In: 2019 8th Brazilian Conference on Intelligent Systems (BRACIS), pp. 443\u2013448 (2019). https:\/\/doi.org\/10.1109\/BRACIS.2019.00084","DOI":"10.1109\/BRACIS.2019.00084"},{"key":"15_CR54","unstructured":"Scao, T.L., et al.: Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)"},{"key":"15_CR55","unstructured":"Shazeer, N., Stern, M.: Adafactor: Adaptive learning rates with sublinear memory cost. In: International Conference on Machine Learning, pp. 4596\u20134604. PMLR (2018)"},{"key":"15_CR56","unstructured":"Shliazhko, O., Fenogenova, A., Tikhonova, M., Mikhailov, V., Kozlova, A., Shavrina, T.: MGPT: Few-shot learners go multilingual. arXiv preprint arXiv:2204.07580 (2022)"},{"key":"15_CR57","doi-asserted-by":"publisher","unstructured":"Silveira, I.C., Maua, D.D.: Advances in automatically solving the enem. In: 2018 7th Brazilian Conference on Intelligent Systems (BRACIS), pp. 43\u201348. IEEE Computer Society, Los Alamitos, CA, USA (oct 2018). https:\/\/doi.org\/10.1109\/BRACIS.2018.00016","DOI":"10.1109\/BRACIS.2018.00016"},{"key":"15_CR58","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642. Association for Computational Linguistics, Seattle, Washington, USA (2013)"},{"key":"15_CR59","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/978-3-030-61377-8_28","volume-title":"Intelligent Systems","author":"F Souza","year":"2020","unstructured":"Souza, F., Nogueira, R., Lotufo, R.: BERTimbau: pretrained BERT models for Brazilian Portuguese. In: Cerri, R., Prati, R.C. (eds.) BRACIS 2020. LNCS (LNAI), vol. 12319, pp. 403\u2013417. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61377-8_28"},{"key":"15_CR60","unstructured":"Srivastava, A., et al.: Beyond the imitation game: Quantifying and extrapolating the capabilities of language models. arXiv preprint arXiv:2206.04615 (2022)"},{"key":"15_CR61","unstructured":"Su, J., Lu, Y., Pan, S., Wen, B., Liu, Y.: Roformer: Enhanced transformer with rotary position embedding. arXiv preprint arXiv:2104.09864 (2021)"},{"key":"15_CR62","unstructured":"Taylor, R. et al.: Galactica: A large language model for science. arXiv preprint arXiv:2211.09085 (2022)"},{"key":"15_CR63","unstructured":"Touvron, H., et al.: Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"15_CR64","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (2017)"},{"key":"15_CR65","unstructured":"Wang, B.: Mesh-Transformer-JAX: Model-Parallel Implementation of Transformer Language Model with JAX. https:\/\/github.com\/kingoflolz\/mesh-transformer-jax (2021)"},{"key":"15_CR66","unstructured":"Wang, B., Komatsuzaki, A.: GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model (2021)"},{"key":"15_CR67","unstructured":"Wei, J., et al.: Emergent abilities of large language models. Transactions on Machine Learning Research (2022), survey Certification"},{"key":"15_CR68","unstructured":"Wu, S., et al.: BloombergGPT: A large language model for finance (2023)"},{"key":"15_CR69","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1162\/tacl_a_00461","volume":"10","author":"L Xue","year":"2022","unstructured":"Xue, L., et al.: Byt5: towards a token-free future with pre-trained byte-to-byte models. Trans. Assoc. Comput. Linguist. 10, 291\u2013306 (2022)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"15_CR70","doi-asserted-by":"crossref","unstructured":"Xue, L., et al.: mt5: A massively multilingual pre-trained text-to-text transformer. arXiv preprint arXiv:2010.11934 (2020)","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"15_CR71","volume-title":"XLNet: Generalized Autoregressive Pretraining for Language Understanding","author":"Z Yang","year":"2019","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R., Le, Q.V.: XLNet: Generalized Autoregressive Pretraining for Language Understanding. Curran Associates Inc., Red Hook, NY, USA (2019)"},{"key":"15_CR72","doi-asserted-by":"crossref","unstructured":"Yong, Z.X., et al.: Bloom+ 1: Adding language support to bloom for zero-shot prompting. arXiv preprint arXiv:2212.09535 (2022)","DOI":"10.18653\/v1\/2023.acl-long.653"},{"key":"15_CR73","unstructured":"Zeng, A., et al.: Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)"},{"key":"15_CR74","unstructured":"Zhang, S., et al.: Opt: Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)"},{"key":"15_CR75","unstructured":"Zhang, X., Zhao, J.J., LeCun, Y.: Character-level convolutional networks for text classification. In: NIPS (2015)"},{"key":"15_CR76","doi-asserted-by":"crossref","unstructured":"Zoph, B.: Designing effective sparse expert models. In: 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), p. 1044. IEEE (2022)","DOI":"10.1109\/IPDPSW55747.2022.00171"}],"container-title":["Lecture Notes in Computer Science","Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-45392-2_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T16:48:42Z","timestamp":1710348522000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-45392-2_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031453915","9783031453922"],"references-count":76,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-45392-2_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"12 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BRACIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazilian Conference on Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belo Horizonte","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bracis2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.bracis.dcc.ufmg.br","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"JEMS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"242","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"90","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}