{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T14:44:06Z","timestamp":1773326646700,"version":"3.50.1"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031353192","type":"print"},{"value":"9783031353208","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-35320-8_17","type":"book-chapter","created":{"date-parts":[[2023,6,13]],"date-time":"2023-06-13T05:01:53Z","timestamp":1686632513000},"page":"243-256","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Evaluation of\u00a0Transformer-Based Models for\u00a0Punctuation and\u00a0Capitalization Restoration in\u00a0Spanish and\u00a0Portuguese"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7317-7145","authenticated-orcid":false,"given":"Ronghao","family":"Pan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3651-2660","authenticated-orcid":false,"given":"Jos\u00e9 Antonio","family":"Garc\u00eda-D\u00edaz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2457-1791","authenticated-orcid":false,"given":"Rafael","family":"Valencia-Garc\u00eda","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,14]]},"reference":[{"key":"17_CR1","doi-asserted-by":"publisher","unstructured":"Alam, T., Khan, A., Alam, F.: Punctuation restoration using transformer models for high-and low-resource languages. In: Proceedings of the Sixth Workshop on Noisy User-Generated Text (W-NUT 2020), pp. 132\u2013142. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.wnut-1.18","DOI":"10.18653\/v1\/2020.wnut-1.18"},{"key":"17_CR2","doi-asserted-by":"publisher","unstructured":"Bannard, C., Callison-Burch, C.: Paraphrasing with bilingual parallel corpora. In: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL 2005), pp. 597\u2013604. Association for Computational Linguistics, Ann Arbor, Michigan (2005). https:\/\/doi.org\/10.3115\/1219840.1219914","DOI":"10.3115\/1219840.1219914"},{"key":"17_CR3","doi-asserted-by":"publisher","unstructured":"Ba\u00f1\u00f3n, M., et al.: ParaCrawl: web-scale acquisition of parallel corpora. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 4555\u20134567. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.417","DOI":"10.18653\/v1\/2020.acl-main.417"},{"key":"17_CR4","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-14206-7","volume-title":"Harmonization and Development of Resources and Tools for Italian Natural Language Processing within the PARLI Project","year":"2015","unstructured":"Basili, R., Bosco, C., Delmonte, R., Moschitti, A., Simi, M. (eds.): Harmonization and Development of Resources and Tools for Italian Natural Language Processing within the PARLI Project. SCI, vol. 589. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-14206-7"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Bostrom, K., Durrett, G.: Byte pair encoding is suboptimal for language model pretraining. CoRR abs\/2004.03720 (2020). https:\/\/arxiv.org\/abs\/2004.03720","DOI":"10.18653\/v1\/2020.findings-emnlp.414"},{"key":"17_CR6","unstructured":"Ca\u00f1ete, J., Chaperon, G., Fuentes, R., Ho, J.H., Kang, H., P\u00e9rez, J.: Spanish pre-trained BERT model and evaluation data. PML4DC ICLR 2020(2020), 1\u201310 (2020)"},{"key":"17_CR7","unstructured":"Ca\u00f1ete, J., Donoso, S., Bravo-Marquez, F., Carvallo, A., Araujo, V.: ALBETO and DistilBETO: lightweight Spanish language models. In: Proceedings of the Thirteenth Language Resources and Evaluation Conference, pp. 4291\u20134298. Marseille, France (2022)"},{"key":"17_CR8","doi-asserted-by":"publisher","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 8440\u20138451. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.747","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"17_CR9","doi-asserted-by":"publisher","unstructured":"Courtland, M., Faulkner, A., McElvain, G.: Efficient automatic punctuation restoration using bidirectional transformers with robust inference. In: Proceedings of the 17th International Conference on Spoken Language Translation, pp. 272\u2013279. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.iwslt-1.33","DOI":"10.18653\/v1\/2020.iwslt-1.33"},{"key":"17_CR10","doi-asserted-by":"publisher","unstructured":"Fandi\u00f1o, A.G., et al.: Maria: Spanish language models. Procesamiento del Lenguaje Natural 68 (2022). https:\/\/doi.org\/10.26342\/2022-68-3","DOI":"10.26342\/2022-68-3"},{"key":"17_CR11","unstructured":"Federico, M., Cettolo, M., Bentivogli, L., Paul, M., St\u00fcker, S.: Overview of the IWSLT 2012 evaluation campaign. In: Proceedings of the 9th International Workshop on Spoken Language Translation: Evaluation Campaign, pp. 12\u201333. Hong Kong, Table of contents (2012). https:\/\/aclanthology.org\/2012.iwslt-evaluation.1"},{"key":"17_CR12","first-page":"59","volume":"67","author":"A Gonz\u00e1lez-Docasal","year":"2021","unstructured":"Gonz\u00e1lez-Docasal, A., Garc\u00eda-Pablos, A., Arzelus, H., \u00c1lvarez, A.: AutoPunct: a BERT-based automatic punctuation and capitalisation system for Spanish and basque. Procesamiento del Lenguaje Natural 67, 59\u201368 (2021). http:\/\/journal.sepln.org\/sepln\/ojs\/ojs\/index.php\/pln\/article\/view\/6377","journal-title":"Procesamiento del Lenguaje Natural"},{"key":"17_CR13","doi-asserted-by":"publisher","unstructured":"Jones, D., et al.: Measuring the readability of automatic speech-to-text transcripts. In: 8th European Conference on Speech Communication and Technology, EUROSPEECH 2003 - INTERSPEECH 2003, Geneva, Switzerland, 1\u20134 September 2003. ISCA (2003). https:\/\/doi.org\/10.21437\/Eurospeech","DOI":"10.21437\/Eurospeech"},{"key":"17_CR14","doi-asserted-by":"publisher","unstructured":"Lima, T.B.D., et al.: Sequence labeling algorithms for punctuation restoration in Brazilian Portuguese texts. In: Xavier-Junior, J.C., Rios, R.A. (eds.) Intelligent Systems (BRACIS 2022). LNCS, vol. 13654, pp. 616\u2013630. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-21689-3_43","DOI":"10.1007\/978-3-031-21689-3_43"},{"key":"17_CR15","first-page":"27","volume":"70","author":"R Pan","year":"2023","unstructured":"Pan, R., Garc\u00eda-D\u00edaz, J.A., Vicente, P.J.V., Valencia-Garc\u00eda, R.: Evaluation of transformer-based models for punctuation and capitalization restoration in Catalan and Galician. Proces. del Leng. Natural 70, 27\u201338 (2023). http:\/\/journal.sepln.org\/sepln\/ojs\/ojs\/index.php\/pln\/article\/view\/6476","journal-title":"Proces. del Leng. Natural"},{"key":"17_CR16","unstructured":"De la Rosa, J.G., Ponferrada, E., Romero, M., Villegas, P., Gonz\u00e1lez de Prado Salas, P., Grandury, M.: Bertin: efficient pre-training of a Spanish language model using perplexity sampling. Procesamiento del Lenguaje Natural 68, 13\u201323 (2022). http:\/\/journal.sepln.org\/sepln\/ojs\/ojs\/index.php\/pln\/article\/view\/6403"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Souza, F., Nogueira, R., Lotufo, R.: BERTimbau: pretrained BERT models for Brazilian Portuguese. In: 9th Brazilian Conference on Intelligent Systems, BRACIS, Rio Grande do Sul, Brazil, 20\u201323 October (2020). (To appear)","DOI":"10.1007\/978-3-030-61377-8_28"},{"key":"17_CR18","doi-asserted-by":"publisher","unstructured":"Tilk, O., Alum\u00e4e, T.: LSTM for punctuation restoration in speech transcripts. In: Sixteenth Annual Conference of the International Speech Communication Association (2015). https:\/\/doi.org\/10.21437\/Interspeech","DOI":"10.21437\/Interspeech"},{"key":"17_CR19","doi-asserted-by":"publisher","unstructured":"T\u00fcndik, M.A., Szasz\u00e1k, G.: Joint word- and character-level embedding CNN-RNN models for punctuation restoration. In: 2018 9th IEEE International Conference on Cognitive Infocommunications (CogInfoCom), pp. 000135\u2013000140 (2018). https:\/\/doi.org\/10.1109\/CogInfoCom.2018.8639876","DOI":"10.1109\/CogInfoCom.2018.8639876"},{"key":"17_CR20","doi-asserted-by":"publisher","unstructured":"Yi, J., Tao, J.: Self-attention based model for punctuation prediction using word and speech embeddings. In: 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019), pp. 7270\u20137274 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682260","DOI":"10.1109\/ICASSP.2019.8682260"},{"key":"17_CR21","doi-asserted-by":"publisher","unstructured":"Yi, J., Tao, J., Bai, Y., Tian, Z., Fan, C.: Adversarial transfer learning for punctuation restoration (2020). https:\/\/doi.org\/10.48550\/ARXIV.2004.00248","DOI":"10.48550\/ARXIV.2004.00248"},{"key":"17_CR22","unstructured":"Zhang, D., Wu, S., Yang, N., Li, M.: Punctuation prediction with transition-based parsing. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics, vol. 1, pp. 752\u2013760. Association for Computational Linguistics, Sofia, Bulgaria (2013). https:\/\/aclanthology.org\/P13-1074"},{"key":"17_CR23","doi-asserted-by":"publisher","unstructured":"Zhu, X., Gardiner, S., Rossouw, D., Rold\u00e1n, T., Corston-Oliver, S.: Punctuation restoration in Spanish customer support transcripts using transfer learning. In: Proceedings of the Third Workshop on Deep Learning for Low-Resource Natural Language Processing, pp. 80\u201389. Association for Computational Linguistics, Hybrid (2022). https:\/\/doi.org\/10.18653\/v1\/2022.deeplo-1.9","DOI":"10.18653\/v1\/2022.deeplo-1.9"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-35320-8_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,13]],"date-time":"2023-06-13T06:06:28Z","timestamp":1686636388000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-35320-8_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031353192","9783031353208"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-35320-8_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"14 June 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLDB","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Applications of Natural Language to Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Derby","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 June 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 June 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nldb2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.derby.ac.uk\/events\/latest-events\/nldb-2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easy Chair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"89","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}