{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:25:44Z","timestamp":1775229944616,"version":"3.50.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030983048","type":"print"},{"value":"9783030983055","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-98305-5_31","type":"book-chapter","created":{"date-parts":[[2022,3,17]],"date-time":"2022-03-17T08:05:37Z","timestamp":1647504337000},"page":"333-343","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Brazilian Portuguese Speech Recognition Using Wav2vec 2.0"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2099-5004","authenticated-orcid":false,"given":"Lucas Rafael","family":"Stefanel Gris","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0160-7173","authenticated-orcid":false,"given":"Edresson","family":"Casanova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5885-6747","authenticated-orcid":false,"given":"Frederico Santos","family":"de Oliveira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2967-6077","authenticated-orcid":false,"given":"Anderson","family":"da Silva Soares","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5647-0891","authenticated-orcid":false,"given":"Arnaldo","family":"Candido Junior","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,3,16]]},"reference":[{"key":"31_CR1","doi-asserted-by":"publisher","unstructured":"Aguiar de Lima, T., Da Costa-Abreu, M.: A survey on automatic speech recognition systems for Portuguese language and its variations. Comput. Speech Lang. 62, 101055 (2020). https:\/\/doi.org\/10.1016\/j.csl.2019.101055. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0885230819302992","DOI":"10.1016\/j.csl.2019.101055"},{"key":"31_CR2","doi-asserted-by":"crossref","unstructured":"Alencar, V., Alcaim, A.: LSF and LPC-derived features for large vocabulary distributed continuous speech recognition in Brazilian Portuguese. In: 2008 42nd Asilomar Conference on Signals, Systems and Computers, pp. 1237\u20131241. IEEE (2008)","DOI":"10.1109\/ACSSC.2008.5074614"},{"key":"31_CR3","unstructured":"Amodei, D., et al.: Deep speech 2: end-to-end speech recognition in English and mandarin. In: International Conference on Machine Learning, pp. 173\u2013182. PMLR (2016)"},{"key":"31_CR4","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"31_CR5","unstructured":"Baevski, A., Auli, M.: Adaptive input representations for neural language modeling. In: International Conference on Learning Representations (2018)"},{"key":"31_CR6","unstructured":"Baevski, A., Schneider, S., Auli, M.: vq-wav2vec: self-supervised learning of discrete speech representations. In: International Conference on Learning Representations (ICLR) (2020). https:\/\/openreview.net\/pdf?id=rylwJxrYDS"},{"key":"31_CR7","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M.F., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 12449\u201312460. Curran Associates, Inc. (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/92d1e1eb1cd6f9fba3227870bb6d7f07-Paper.pdf"},{"key":"31_CR8","doi-asserted-by":"publisher","unstructured":"Batista, C., Dias, A.L., Sampaio Neto, N.: Baseline acoustic models for Brazilian Portuguese using Kaldi tools. In: Proceedings of IberSPEECH 2018, pp. 77\u201381 (2018). https:\/\/doi.org\/10.21437\/IberSPEECH.2018-17","DOI":"10.21437\/IberSPEECH.2018-17"},{"key":"31_CR9","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 8440\u20138451 (2020)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"issue":"6","key":"31_CR10","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1121\/1.1906946","volume":"24","author":"KH Davis","year":"1952","unstructured":"Davis, K.H., Biddulph, R., Balashek, S.: Automatic recognition of spoken digits. J. Acoust. Soc. Am. 24(6), 637\u2013642 (1952)","journal-title":"J. Acoust. Soc. Am."},{"key":"31_CR11","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge (2016)"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"31_CR13","doi-asserted-by":"crossref","unstructured":"Gris, L.R.S., Casanova, E., de Oliveira, F.S., da Silva Soares, A., Candido-Junior, A.: Desenvolvimento de um modelo de reconhecimento de voz para o Portugu\u00eas Brasileiro com poucos dados utilizando o Wav2vec 2.0. In: Anais do XV Brazilian e-Science Workshop, pp. 129\u2013136. SBC (2021)","DOI":"10.5753\/bresci.2021.15798"},{"key":"31_CR14","unstructured":"Heafield, K.: KenLM: faster and smaller language model queries. In: Proceedings of the Sixth Workshop on Statistical Machine Translation, pp. 187\u2013197 (2011)"},{"key":"31_CR15","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs) (2020)"},{"issue":"8","key":"31_CR16","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"4","key":"31_CR17","first-page":"393","volume":"9","author":"S Karpagavalli","year":"2016","unstructured":"Karpagavalli, S., Chandra, E.: A review on automatic speech recognition architecture and approaches. Int. J. Sig. Process. Image Process. Pattern Recogn. 9(4), 393\u2013404 (2016)","journal-title":"Int. J. Sig. Process. Image Process. Pattern Recogn."},{"key":"31_CR18","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Seltzer, M.L., Khudanpur, S.: A study on data augmentation of reverberant speech for robust speech recognition. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5220\u20135224. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"31_CR19","doi-asserted-by":"crossref","unstructured":"Li, J.: Recent advances in end-to-end automatic speech recognition. arXiv preprint arXiv:2111.01690 (2021)","DOI":"10.1561\/116.00000050"},{"issue":"1","key":"31_CR20","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/s13173-010-0023-1","volume":"17","author":"N Neto","year":"2011","unstructured":"Neto, N., Patrick, C., Klautau, A., Trancoso, I.: Free tools and resources for Brazilian Portuguese speech recognition. J. Braz. Comput. Soc. 17(1), 53\u201368 (2011)","journal-title":"J. Braz. Comput. Soc."},{"key":"31_CR21","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1007\/978-3-540-85980-2_33","volume-title":"Computational Processing of the Portuguese Language","author":"N Neto","year":"2008","unstructured":"Neto, N., Silva, P., Klautau, A., Adami, A.: Spoltech and OGI-22 baseline systems for speech recognition in Brazilian Portuguese. In: Teixeira, A., de Lima, V.L.S., de Oliveira, L.C., Quaresma, P. (eds.) PROPOR 2008. LNCS (LNAI), vol. 5190, pp. 256\u2013259. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-85980-2_33"},{"key":"31_CR22","doi-asserted-by":"publisher","unstructured":"Park, D.S., et al.: SpecAugment: a simple data augmentation method for automatic speech recognition. In: INTERSPEECH 2019, September 2019. https:\/\/doi.org\/10.21437\/Interspeech.2019-2680","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"31_CR23","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding, No. CONF. IEEE Signal Processing Society (2011)"},{"key":"31_CR24","doi-asserted-by":"publisher","unstructured":"Pratap, V., Xu, Q., Sriram, A., Synnaeve, G., Collobert, R.: MLS: a large-scale multilingual dataset for speech research. In: INTERSPEECH 2020, October 2020. https:\/\/doi.org\/10.21437\/Interspeech.2020-2826","DOI":"10.21437\/Interspeech.2020-2826"},{"key":"31_CR25","unstructured":"Quintanilha, I.M.: End-to-end speech recognition applied to Brazilian Portuguese using deep learning. MSc dissertation (2017)"},{"issue":"1","key":"31_CR26","first-page":"230","volume":"35","author":"IM Quintanilha","year":"2020","unstructured":"Quintanilha, I.M., Netto, S.L., Biscainho, L.W.P.: An open-source end-to-end ASR system for Brazilian Portuguese using DNNs built from newly assembled corpora. J. Commun. Inf. Syst. 35(1), 230\u2013242 (2020)","journal-title":"J. Commun. Inf. Syst."},{"key":"31_CR27","doi-asserted-by":"crossref","unstructured":"Schneider, S., Baevski, A., Collobert, R., Auli, M.: wav2vec: unsupervised pre-training for speech recognition. In: INTERSPEECH (2019)","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"31_CR28","unstructured":"Schramm, M., Freitas, L., Zanuz, A., Barone, D.: CSLU: Spoltech Brazilian Portuguese version 1.0 ldc2006s16 (2006)"},{"key":"31_CR29","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-vectors: robust DNN embeddings for speaker recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5329\u20135333. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"31_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Neural Information Processing Systems (NIPS) (2017)"},{"key":"31_CR31","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic Speech Recognition","author":"D Yu","year":"2015","unstructured":"Yu, D., Deng, L.: Automatic Speech Recognition. Springer, London (2015). https:\/\/doi.org\/10.1007\/978-1-4471-5779-3"}],"container-title":["Lecture Notes in Computer Science","Computational Processing of the Portuguese Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-98305-5_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,29]],"date-time":"2023-01-29T10:11:10Z","timestamp":1674987070000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-98305-5_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030983048","9783030983055"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-98305-5_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"16 March 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PROPOR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Processing of the Portuguese Language","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fortaleza","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 March 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 March 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"propor2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.universidadedefortaleza.com\/propor2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"88","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.16","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.87","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"A Scientific Review Committee of 97 researchers reviewed all papers. Conference was held online due to COVID-19.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}