{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T08:27:23Z","timestamp":1765268843511,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031162695"},{"type":"electronic","value":"9783031162701"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-16270-1_25","type":"book-chapter","created":{"date-parts":[[2022,9,15]],"date-time":"2022-09-15T09:30:11Z","timestamp":1663234211000},"page":"301-312","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Transformer-Based Automatic Speech Recognition of\u00a0Formal and\u00a0Colloquial Czech in\u00a0MALACH Project"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3889-8069","authenticated-orcid":false,"given":"Jan","family":"Lehe\u010dka","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4761-1645","authenticated-orcid":false,"given":"Josef V.","family":"Psutka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0764-3207","authenticated-orcid":false,"given":"Josef","family":"Psutka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,9,16]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Babu, A., et al.: XLS-R: self-supervised cross-lingual speech representation learning at scale. arXiv preprint arXiv:2111.09296 (2021)","DOI":"10.21437\/Interspeech.2022-143"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Baevski, A., Mohamed, A.: Effectiveness of self-supervised pre-training for ASR. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7694\u20137698 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054224"},{"key":"25_CR3","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: Wav2Vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"25_CR4","doi-asserted-by":"publisher","unstructured":"Byrne, W., et al.: Automatic recognition of spontaneous speech for access to multilingual oral history archives. IEEE Trans. Speech Audio Process. 12(4), 420\u2013435 (2004). https:\/\/doi.org\/10.1109\/TSA.2004.828702","DOI":"10.1109\/TSA.2004.828702"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Chen, S., et al.: WavLM: large-scale self-supervised pre-training for full stack speech processing. arXiv preprint arXiv:2110.13900 (2021)","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"25_CR6","doi-asserted-by":"publisher","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. In: Hermansky, H., Cernock\u00fd, H., Burget, L., Lamel, L., Scharenborg, O., Motl\u00edcek, P. (eds.) Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August\u20133 September 2021, pp. 2426\u20132430. ISCA (2021). https:\/\/doi.org\/10.21437\/Interspeech. 2021\u2013329. https:\/\/doi.org\/10.21437\/Interspeech.2021-329","DOI":"10.21437\/Interspeech"},{"key":"25_CR7","unstructured":"Cummins, G.M.: Literary czech, common czech, and the instrumental plural. J. Slavic Linguist. 13(2), 271\u2013297 (2005), https:\/\/www.jstor.org\/stable\/24599659"},{"key":"25_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"25_CR10","unstructured":"Heafield, K.: KenLM: faster and smaller language model queries. In: Proceedings of the Sixth Workshop on Statistical Machine Translation, pp. 187\u2013197. Association for Computational Linguistics, Edinburgh, Scotland, July 2011. https:\/\/aclanthology.org\/W11-2123"},{"key":"25_CR11","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"WN Hsu","year":"2021","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: Hubert: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3451\u20133460 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Liu, A.T., Li, S.W., Lee, H.Y.: TERA: self-supervised learning of transformer encoder representation for speech. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 2351\u20132366 (2021)","DOI":"10.1109\/TASLP.2021.3095662"},{"key":"25_CR13","unstructured":"Psutka, J., et al.: Issues in annotation of the Czech spontaneous speech corpus in the MALACH project. In: Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004), pp. 607\u2013610. European Language Resources Association, Lisbon (2004)"},{"key":"25_CR14","doi-asserted-by":"crossref","unstructured":"Psutka, J., Ircing, P., Psutka, J.V., Haji\u010d, J., Byrne, W., M\u00edrovsk\u00fd, J.: Automatic transcription of Czech, Russian and Slovak spontaneous speech in the MALACH project. In: Eurospeech 2005, pp. 1349\u20131352. ISCA (2005)","DOI":"10.21437\/Interspeech.2005-489"},{"key":"25_CR15","unstructured":"Psutka, J., Radov\u00e1, V., Ircing, P., Matou\u0161ek, J., M\u00fcller, L.: USC-SFI MALACH Interviews and Transcripts Czech LDC2014S04 (2014). https:\/\/catalog.ldc.upenn.edu\/LDC2014S04"},{"key":"25_CR16","doi-asserted-by":"publisher","unstructured":"Psutka, J.V., Pra\u017e\u00e1k, A., Van\u011bk, J.: Recognition of heavily accented and emotional speech of English and Czech Holocaust survivors using various DNN architectures. In: Karpov, A., Potapova, R. (eds.) Speech and Computer, pp. 553\u2013564. Springer International Publishing, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87802-3_50","DOI":"10.1007\/978-3-030-87802-3_50"},{"key":"25_CR17","unstructured":"Tahal, K.: A Grammar of Czech as a foreign language. FACTUM CZ, s.r.o. (2010)"},{"key":"25_CR18","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6000\u20136010. NIPS 2017. Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: VoxPopuli: a large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, vol. 1: Long Papers, pp. 993\u20131003. Association for Computational Linguistics, Online, August 2021. https:\/\/aclanthology.org\/2021.acl-long.80","DOI":"10.18653\/v1\/2021.acl-long.80"},{"key":"25_CR20","unstructured":"Wolf, T., et al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345. Association for Computational Linguistics, Online, October 2020. https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-16270-1_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T05:26:25Z","timestamp":1676784385000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-16270-1_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031162695","9783031162701"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-16270-1_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"16 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brno","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.tsdconference.org\/tsd2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}