{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T11:07:04Z","timestamp":1743073624661,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031217524"},{"type":"electronic","value":"9783031217531"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-21753-1_28","type":"book-chapter","created":{"date-parts":[[2022,11,20]],"date-time":"2022-11-20T10:02:32Z","timestamp":1668938552000},"page":"280-288","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Generating a\u00a0European Portuguese BERT Based Model Using Content from\u00a0Arquivo.pt Archive"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3202-2242","authenticated-orcid":false,"given":"Nuno","family":"Miquelina","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5086-059X","authenticated-orcid":false,"given":"Paulo","family":"Quaresma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0793-0003","authenticated-orcid":false,"given":"V\u00edtor Beires","family":"Nogueira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,21]]},"reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Barbaresi, A.: Trafilatura: a web scraping library and command-line tool for text discovery and extraction. In: Proceedings of the Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations, pp. 122\u2013131. Association for Computational Linguistics (2021). https:\/\/aclanthology.org\/2021.acl-demo.15","DOI":"10.18653\/v1\/2021.acl-demo.15"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017). https:\/\/aclanthology.org\/Q17-1010","DOI":"10.1162\/tacl_a_00051"},{"key":"28_CR3","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901. Curran Associates, Inc. (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"28_CR4","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"28_CR5","doi-asserted-by":"publisher","unstructured":"Diouf, R., Sarr, E., Sall, O., Birregah, B., Bousso, M., Mbaye, S.: Web scraping: state-of-the-art and areas of application, pp. 6040\u20136042 (2019). https:\/\/doi.org\/10.1109\/BigData47090.2019.9005594","DOI":"10.1109\/BigData47090.2019.9005594"},{"key":"28_CR6","unstructured":"Gomes, D., Nogueira, A., Miranda, J., Costa, M.: Introducing the Portuguese web archive initiative. In: 8th International Web Archiving Workshop. Springer, Heidelberg (2009)"},{"key":"28_CR7","doi-asserted-by":"publisher","unstructured":"Joshi, V., Peters, M., Hopkins, M.: Extending a parser to distant domains using a few dozen partially annotated examples. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, vol. 1: Long Papers, pp. 1190\u20131199. Association for Computational Linguistics, Melbourne (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1110, https:\/\/aclanthology.org\/P18-1110","DOI":"10.18653\/v1\/P18-1110"},{"key":"28_CR8","unstructured":"Le, H., et al.: Flaubert: unsupervised language model pre-training for French. CoRR abs\/1912.05372 (2019). http:\/\/arxiv.org\/abs\/1912.05372"},{"key":"28_CR9","unstructured":"Lejeune, G., Barbaresi, A.: Bien choisir son outil d\u2019extraction de contenu \u00e0 partir du web. In: 6e conf\u00e9rence conjointe Journ\u00e9es d\u2019\u00c9tudes sur la Parole (JEP, 33e \u00e9dition), Traitement Automatique des Langues Naturelles (TALN, 27e \u00e9dition), Rencontre des \u00c9tudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (R\u00c9CITAL, 22e \u00e9dition), volume 4: D\u00e9monstrations et r\u00e9sum\u00e9s d\u2019articles internationaux, pp. 46\u201349. ATALA, AFCP (2020)"},{"key":"28_CR10","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. CoRR abs\/1907.11692 (2019). http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Martin, L., et al.: CamemBERT: a tasty French language model. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7203\u20137219. Association for Computational Linguistics, Online (2020). https:\/\/www.aclweb.org\/anthology\/2020.acl-main.645","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"28_CR12","unstructured":"Mattmann, C.A., Zitting, J.L.: Tika in action (2012)"},{"key":"28_CR13","unstructured":"McCandless, M., Hatcher, E., Gospodneti\u0107, O., Gospodneti\u0107, O.: Lucene in Action, vol. 2. Manning Greenwich (2010)"},{"key":"28_CR14","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners (2019). https:\/\/openai.com\/blog\/better-language-models\/"},{"key":"28_CR15","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/978-3-030-61377-8_28","volume-title":"Intelligent Systems","author":"F Souza","year":"2020","unstructured":"Souza, F., Nogueira, R., Lotufo, R.: BERTimbau: pretrained BERT models for Brazilian Portuguese. In: Cerri, R., Prati, R.C. (eds.) BRACIS 2020. LNCS (LNAI), vol. 12319, pp. 403\u2013417. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61377-8_28"},{"key":"28_CR16","doi-asserted-by":"publisher","unstructured":"Tripathy, J.K., et al.: Comprehensive analysis of embeddings and pre-training in nlp. Comput. Sci. Rev. 42(C) (2021). https:\/\/doi.org\/10.1016\/j.cosrev.2021.100433","DOI":"10.1016\/j.cosrev.2021.100433"},{"key":"28_CR17","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"}],"container-title":["Lecture Notes in Computer Science","Intelligent Data Engineering and Automated Learning \u2013 IDEAL 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-21753-1_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T12:14:42Z","timestamp":1710332082000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-21753-1_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031217524","9783031217531"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-21753-1_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"21 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IDEAL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Data Engineering and Automated Learning","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Manchester","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 November 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ideal2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ideal-conf.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"79","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"52","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.1","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}