{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:38:39Z","timestamp":1743050319153,"version":"3.40.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030835262"},{"type":"electronic","value":"9783030835279"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-83527-9_45","type":"book-chapter","created":{"date-parts":[[2021,8,29]],"date-time":"2021-08-29T23:04:59Z","timestamp":1630278299000},"page":"523-533","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CNN-TDNN-Based Architecture for Speech Recognition Using Grapheme Models in Bilingual Czech-Slovak Task"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4761-1645","authenticated-orcid":false,"given":"Josef V.","family":"Psutka","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8362-5927","authenticated-orcid":false,"given":"Jan","family":"\u0160vec","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9453-0034","authenticated-orcid":false,"given":"Ale\u0161","family":"Pra\u017e\u00e1k","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,30]]},"reference":[{"issue":"10","key":"45_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Mohamed, A., Jiang, H., Deng, L., Penn, G., Yu, D.: Convolutional neural networks for speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(10), 1533\u20131545 (2014). https:\/\/doi.org\/10.1109\/TASLP.2014.2339736","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"45_CR2","doi-asserted-by":"publisher","unstructured":"Bahl, L., Brown, P., de Souza, P., Mercer, R.: Maximum mutual information estimation of hidden Markov model parameters for speech recognition. In: ICASSP 1986, pp. 49\u201352 (1986). https:\/\/doi.org\/10.1109\/ICASSP.1986.1169179","DOI":"10.1109\/ICASSP.1986.1169179"},{"issue":"4","key":"45_CR3","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1109\/TSA.2004.828702","volume":"12","author":"W Byrne","year":"2004","unstructured":"Byrne, W., et al.: Automatic recognition of spontaneous speech for access to multilingual oral history archives. IEEE Trans. Speech Audio Process. 12(4), 420\u2013435 (2004). https:\/\/doi.org\/10.1109\/TSA.2004.828702","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"45_CR4","unstructured":"Czech SAMPA. https:\/\/www.phon.ucl.ac.uk\/home\/sampa\/czech-uni.htm"},{"key":"45_CR5","doi-asserted-by":"crossref","unstructured":"Kanthak, S., Ney, H.: Multilingual acoustic modeling using graphemes. In: Eurospeech 2003, pp. 1145\u20131148 (2003)","DOI":"10.21437\/Eurospeech.2003-373"},{"key":"45_CR6","doi-asserted-by":"crossref","unstructured":"Killer, M., St\u00fcker, S., Schultz, T.: Grapheme based speech recognition. In: Eurospeech 2003, pp. 3141\u20133144 (2003)","DOI":"10.21437\/Eurospeech.2003-785"},{"key":"45_CR7","doi-asserted-by":"crossref","unstructured":"Lihan, S., Juh\u00e1r, J., \u010ci\u017em\u00e1r, A.: Comparison of Slovak and Czech speech recognition based on grapheme and phoneme acoustic models. In: Interspeech 2006, pp. 149\u2013152 (2006)","DOI":"10.21437\/Interspeech.2006-38"},{"key":"45_CR8","unstructured":"MALACH project (2006). https:\/\/malach.umiacs.umd.edu\/"},{"key":"45_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1007\/978-3-642-00525-1_24","volume-title":"Multimodal Signals: Cognitive and Algorithmic Issues","author":"M Mirilovi\u010d","year":"2009","unstructured":"Mirilovi\u010d, M., Juh\u00e1r, J., \u010ci\u017em\u00e1r, A.: Comparison of grapheme and phoneme based acoustic modeling in LVCSR task in Slovak. In: Esposito, A., Hussain, A., Marinaro, M., Martone, R. (eds.) Multimodal Signals: Cognitive and Algorithmic Issues. LNCS (LNAI), vol. 5398, pp. 242\u2013247. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-00525-1_24"},{"key":"45_CR10","doi-asserted-by":"crossref","unstructured":"Nouza, J., Silovsk\u00fd, J., Zd\u00e1nsk\u00fd, J., Cerva, P., Kroul, M., Chaloupka, J.: Czech-to-Slovak adapted broadcast news transcription system. In: Interspeech 2008, pp. 2683\u20132686. ISCA (2008)","DOI":"10.21437\/Interspeech.2008-665"},{"key":"45_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/978-3-642-12397-9_19","volume-title":"Development of Multimodal Interfaces: Active Listening and Synchrony","author":"J Nouza","year":"2010","unstructured":"Nouza, J., Zdansky, J., Cerva, P., Silovsky, J.: Challenges in speech processing of Slavic languages (case studies in speech recognition of Czech and Slovak). In: Esposito, A., Campbell, N., Vogel, C., Hussain, A., Nijholt, A. (eds.) Development of Multimodal Interfaces: Active Listening and Synchrony. LNCS, vol. 5967, pp. 225\u2013241. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-12397-9_19"},{"issue":"6","key":"45_CR12","doi-asserted-by":"publisher","first-page":"907","DOI":"10.1017\/S1351324915000315","volume":"22","author":"JR Novak","year":"2016","unstructured":"Novak, J.R., Nobuaki, M., Keikichi, H.: Phonetisaurus: exploring grapheme-to-phoneme conversion with joint n-gram models in the WFST framework. Nat. Lang. Eng. 22(6), 907\u2013938 (2016). https:\/\/doi.org\/10.1017\/S1351324915000315","journal-title":"Nat. Lang. Eng."},{"key":"45_CR13","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Povey, D., Khudanpur, S.: A time delay neural network architecture for efficient modeling of long temporal contexts. In: Interspeech 2015, pp. 3214\u20133218 (2015)","DOI":"10.21437\/Interspeech.2015-647"},{"key":"45_CR14","doi-asserted-by":"publisher","unstructured":"Picheny, M., T\u00fcske, Z., Kingsbury, B., Audhkhasi, K., Cui, X., Saon, G.: Challenging the boundaries of speech recognition: the MALACH corpus. In: Interspeech 2019, pp. 326\u2013330 (2019). https:\/\/doi.org\/10.21437\/Interspeech.2019-1907","DOI":"10.21437\/Interspeech.2019-1907"},{"key":"45_CR15","doi-asserted-by":"publisher","unstructured":"Povey, D., et al.: Semi-orthogonal low-rank matrix factorization for deep neural networks. In: Interspeech 2018, pp. 3743\u20133747 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1417","DOI":"10.21437\/Interspeech.2018-1417"},{"key":"45_CR16","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding (2011)"},{"key":"45_CR17","doi-asserted-by":"publisher","unstructured":"Povey, D., et al.: Purely sequence-trained neural networks for ASR based on lattice-free MMI. In: Interspeech 2016, pp. 2751\u20132755 (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-595","DOI":"10.21437\/Interspeech.2016-595"},{"key":"45_CR18","doi-asserted-by":"publisher","unstructured":"Psutka, J., Haji\u010d, J., Byrne, W.: The development of ASR for Slavic languages in the MALACH project. In: ICASSP 2004, pp. iii\u2013749 (2004). https:\/\/doi.org\/10.1109\/ICASSP.2004.1326653","DOI":"10.1109\/ICASSP.2004.1326653"},{"key":"45_CR19","unstructured":"Psutka, J., Hoidekr, J., Ircing, P., Psutka, J.V.: Recognition of spontaneous speech - some problems and their solutions. In: CITSA 2006, pp. 169\u2013172. IIIS (2006)"},{"key":"45_CR20","doi-asserted-by":"crossref","unstructured":"Psutka, J., Ircing, P., Psutka, J.V., Haji\u010d, J., Byrne, W., M\u00edrovsk\u00fd, J.: Automatic transcription of Czech, Russian and Slovak spontaneous speech in the MALACH project. In: Eurospeech 2005, pp. 1349\u20131352. ISCA (2005)","DOI":"10.21437\/Interspeech.2005-489"},{"key":"45_CR21","unstructured":"Psutka, J.V., Psutka, J., Radov\u00e1, V., Ircing, P., Matou\u0161ek, J., M\u00fcller, L.: USC-SFI MALACH interviews and transcripts Czech (2014). https:\/\/catalog.ldc.upenn.edu\/LDC2014S04"},{"key":"45_CR22","unstructured":"Slovak SAMPA. http:\/\/www.ui.sav.sk\/pp\/speech\/sampa_sk.htm"},{"key":"45_CR23","doi-asserted-by":"publisher","unstructured":"\u0160vec, J., Psutka, J., Trmal, J., \u0160m\u00eddl, L., Ircing, P., Sedmidubsk\u00fd, J.: On the use of grapheme models for searching in large spoken archives. In: ICASSP 2018, pp. 6259\u20136263 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8461774","DOI":"10.1109\/ICASSP.2018.8461774"},{"key":"45_CR24","doi-asserted-by":"publisher","unstructured":"Trmal, J., et al.: The Kaldi OpenKWS system: improving low resource keyword search. In: Interspeech 2017, pp. 3597\u20133601 (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-601","DOI":"10.21437\/Interspeech.2017-601"},{"issue":"6","key":"45_CR25","doi-asserted-by":"publisher","first-page":"1818","DOI":"10.1109\/TASL.2012.2190928","volume":"20","author":"J Van\u011bk","year":"2012","unstructured":"Van\u011bk, J., Trmal, J., Psutka, J.V., Psutka, J.: Optimized acoustic likelihoods computation for NVIDIA and ATI\/AMD graphics processors. IEEE Trans. Audio Speech Lang. Process. 20(6), 1818\u20131828 (2012). https:\/\/doi.org\/10.1109\/TASL.2012.2190928","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"3","key":"45_CR26","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/29.21701","volume":"37","author":"A Waibel","year":"1989","unstructured":"Waibel, A., Hanazawa, T., Hinton, G., Shikano, K., Lang, K.J.: Phoneme recognition using time-delay neural networks. IEEE Trans. Acoust. Speech Sig. Process. 37(3), 328\u2013339 (1989). https:\/\/doi.org\/10.1109\/29.21701","journal-title":"IEEE Trans. Acoust. Speech Sig. Process."}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-83527-9_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,8]],"date-time":"2023-01-08T07:42:13Z","timestamp":1673163733000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-83527-9_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030835262","9783030835279"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-83527-9_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"30 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Olomouc","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.kiv.zcu.cz\/tsd2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"TSDEngine 3.2","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"101","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,93","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}