{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:27:24Z","timestamp":1775230044041,"version":"3.50.1"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030835262","type":"print"},{"value":"9783030835279","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-83527-9_7","type":"book-chapter","created":{"date-parts":[[2021,8,29]],"date-time":"2021-08-29T23:04:59Z","timestamp":1630278299000},"page":"86-94","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Transformer-Based Automatic Punctuation Prediction and Word Casing Reconstruction of the ASR Output"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8362-5927","authenticated-orcid":false,"given":"Jan","family":"\u0160vec","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3889-8069","authenticated-orcid":false,"given":"Jan","family":"Lehe\u010dka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8169-2410","authenticated-orcid":false,"given":"Lubo\u0161","family":"\u0160m\u00eddl","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6967-1687","authenticated-orcid":false,"given":"Pavel","family":"Ircing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,30]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Batista, F., Caseiro, D., Mamede, N.J., Trancoso, I.: Recovering punctuation marks for automatic speech recognition. In: Proceedings of Interspeech, vol. 2007, pp. 2153\u20132156 (2007)","DOI":"10.21437\/Interspeech.2007-581"},{"key":"7_CR2","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the NAACL: HLT, vol. 1, pp. 4171\u20134186. ACL, Minneapolis, Minnesota (2019)"},{"key":"7_CR3","doi-asserted-by":"publisher","unstructured":"\u017belasko, P., Szyma\u0144ski, P., Mizgajski, J., Szymczak, A., Carmiel, Y., Dehak, N.: Punctuation prediction model for conversational speech. In: Proceedings of Interspeech 2018, pp. 2633\u20132637 (2018). https:\/\/doi.org\/10.21437\/Interspeech","DOI":"10.21437\/Interspeech"},{"key":"7_CR4","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-642-40585-3_32","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2013","unstructured":"Hanzl\u00ed\u010dek, Z., Matou\u0161ek, J., Tihelka, D.: Experiments on reducing footprint of unit selection TTS system. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 249\u2013256. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_32"},{"key":"7_CR5","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58323-1","volume-title":"Text, Speech, and Dialogue","year":"2020","unstructured":"Sojka, P., Kope\u010dek, I., Pala, K., Hor\u00e1k, A. (eds.): TSD 2020. LNCS (LNAI), vol. 12284. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58323-1"},{"key":"7_CR6","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: Proceedings of the 36th International Conference on Machine Learning, vol. 97, pp. 2790\u20132799. PMLR, Long Beach, California, USA (2019)"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Kala, J., Matou\u0161ek, J.: Very fast unit selection using viterbi search with zero-concatenation-cost chains. In: Proceedings of IEEE ICASSP, pp. 2569\u20132573 (2014)","DOI":"10.1109\/ICASSP.2014.6854064"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Kol\u00e1r, J., Lamel, L.: Development and evaluation of automatic punctuation for French and English speech-to-text. In: Proceedings of Interspeech, vol. 2012, pp. 1376\u20131379 (2012)","DOI":"10.21437\/Interspeech.2012-396"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Kudo, T., Richardson, J.: Sentencepiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. arXiv preprint arXiv:1808.06226 (2018)","DOI":"10.18653\/v1\/D18-2012"},{"key":"7_CR10","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-3-319-24033-6_35","volume-title":"Text, Speech, and Dialogue","author":"J Lehe\u010dka","year":"2015","unstructured":"Lehe\u010dka, J., \u0160vec, J.: Improving multi-label document classification of Czech news articles. In: Kr\u00e1l, P., Matou\u0161ek, V. (eds.) TSD 2015. LNCS (LNAI), vol. 9302, pp. 307\u2013315. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24033-6_35"},{"key":"7_CR11","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-030-58323-1_23","volume-title":"Text, Speech, and Dialogue","author":"J Lehe\u010dka","year":"2020","unstructured":"Lehe\u010dka, J., \u0160vec, J., Ircing, P., \u0160m\u00eddl, L.: Adjusting BERT\u2019s pooling layer for large-scale multi-label text classification. In: Sojka, P., Kope\u010dek, I., Pala, K., Hor\u00e1k, A. (eds.) TSD 2020. LNCS (LNAI), vol. 12284, pp. 214\u2013221. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58323-1_23"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Makhija, K., Ho, T.N., Chng, E.S.: Transfer learning for punctuation prediction. In: Asia-Pacific Signal and Information Processing Association Annual Conference, pp. 268\u2013273. IEEE (2019)","DOI":"10.1109\/APSIPAASC47483.2019.9023200"},{"key":"7_CR13","unstructured":"Makhoul, J., Kubala, F., Schwartz, R., Weischedel, R.: Performance measures for information extraction. In: Proceedings of DARPA Broadcast News Workshop (08 2000)"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Matou\u0161ek, J., Tihelka, D.: Annotation errors detection in TTS corpora. In: INTERSPEECH, Lyon, France, pp. 1511\u20131515 (2013)","DOI":"10.21437\/Interspeech.2013-305"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech: an ASR corpus based on public domain audio books. In: Proceedings of IEEE ICASSP, pp. 5206\u20135210 (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"issue":"140","key":"7_CR16","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1007\/978-3-642-23538-2_45","volume-title":"Text, Speech and Dialogue","author":"J \u0160vec","year":"2011","unstructured":"\u0160vec, J., Hoidekr, J., Soutner, D., Vavru\u0161ka, J.: Web Text data mining for building large scale language modelling corpus. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2011. LNCS (LNAI), vol. 6836, pp. 356\u2013363. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23538-2_45"},{"issue":"2","key":"7_CR18","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/s10579-013-9246-z","volume":"48","author":"J \u0160vec","year":"2013","unstructured":"\u0160vec, J., et al.: General framework for mining, processing and storing large amounts of electronic texts for language modeling purposes. Lang. Resour. Eval. 48(2), 227\u2013248 (2013). https:\/\/doi.org\/10.1007\/s10579-013-9246-z","journal-title":"Lang. Resour. Eval."},{"key":"7_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/978-3-030-58323-1_35","volume-title":"Text, Speech, and Dialogue","author":"J \u0160vec","year":"2020","unstructured":"\u0160vec, J., Lehe\u010dka, J., \u0160m\u00eddl, L., Ircing, P.: Automatic correction of i\/y spelling in Czech ASR output. In: Sojka, P., Kope\u010dek, I., Pala, K., Hor\u00e1k, A. (eds.) TSD 2020. LNCS (LNAI), vol. 12284, pp. 321\u2013330. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58323-1_35"},{"key":"7_CR20","doi-asserted-by":"publisher","unstructured":"Szasz\u00e1k, G., \u00c1kos T\u00fcndik, M.: Leveraging a character, word and prosody triplet for an asr error robust and agglutination friendly punctuation approach. In: Proceedings of Interspeech 2019, pp. 2988\u20132992 (2019). https:\/\/doi.org\/10.21437\/Interspeech. 2019\u20132132","DOI":"10.21437\/Interspeech"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Tilk, O., Alum\u00e4e, T.: LSTM for punctuation restoration in speech transcripts. In: Proceedings of Interspeech, vol. 2015, pp. 683\u2013687 (2015)","DOI":"10.21437\/Interspeech.2015-240"},{"key":"7_CR22","doi-asserted-by":"publisher","unstructured":"Tilk, O., Alum\u00e4e, T.: Bidirectional recurrent neural network with attention mechanism for punctuation restoration. In: Proceedings of Interspeech 2016, pp. 3047\u20133051 (2016). https:\/\/doi.org\/10.21437\/Interspeech. 2016\u20131517","DOI":"10.21437\/Interspeech"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Ueffing, N., Bisani, M., Vozila, P.: Improved models for automatic punctuation prediction for spoken and written text. In: Proceedings of Interspeech, vol. 2013, pp. 3097\u20133101 (2013)","DOI":"10.21437\/Interspeech.2013-675"},{"key":"7_CR24","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 2017-Decem(Nips), pp. 5999\u20136009 (2017)"},{"key":"7_CR25","unstructured":"\u0160vec, J., Bul\u00edn, M., Pra\u017e\u00e1k, A., Ircing, P.: UWebASR - Web-based ASR engine for Czech and Slovak. In: CLARIN Annual Conference 2018 Proceedings (2018)"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-83527-9_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,29]],"date-time":"2021-08-29T23:08:45Z","timestamp":1630278525000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-83527-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030835262","9783030835279"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-83527-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"30 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Olomouc","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.kiv.zcu.cz\/tsd2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"TSDEngine 3.2","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"101","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,93","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}