{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T06:43:18Z","timestamp":1725864198922},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319455099"},{"type":"electronic","value":"9783319455105"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-45510-5_45","type":"book-chapter","created":{"date-parts":[[2016,9,2]],"date-time":"2016-09-02T07:03:04Z","timestamp":1472799784000},"page":"391-399","source":"Crossref","is-referenced-by-count":0,"title":["KALDI Recipes for the Czech Speech Recognition Under Various Conditions"],"prefix":"10.1007","author":[{"given":"Petr","family":"Mizera","sequence":"first","affiliation":[]},{"given":"Ji\u0159\u00ed","family":"Fiala","sequence":"additional","affiliation":[]},{"given":"Ale\u0161","family":"Brich","sequence":"additional","affiliation":[]},{"given":"Petr","family":"Pollak","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,9,3]]},"reference":[{"key":"45_CR1","doi-asserted-by":"crossref","unstructured":"Bola\u00f1os, D.: The BAVIECA open-source speech recognition toolkit. In: 2012 IEEE Spoken Language Technology Workshop (SLT), pp. 354\u2013359, December 2012","DOI":"10.1109\/SLT.2012.6424249"},{"key":"45_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1007\/978-3-319-01931-4_32","volume-title":"Speech and Computer","author":"M Borsky","year":"2013","unstructured":"Borsky, M., Mizera, P., Pollak, P.: Noise and channel normalized cepstral features for far-speech recognition. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS, vol. 8113, pp. 241\u2013248. Springer, Heidelberg (2013)"},{"issue":"1","key":"45_CR3","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl, G., Yu, D., Deng, L., Acero, A.: Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"45_CR4","unstructured":"Ernestus, M., Kockova-Amortova, L., Pollak, P.: The Nijmegen corpus of casual Czech. In: Proceedings of the LREC 2014: 9th International Conference on Language Resources and Evaluation, Reykjavik, Iceland, pp. 365\u2013370 (2014)"},{"key":"45_CR5","unstructured":"Fousek, P., Pollak, P.: Efficient and reliable measurement and simulation of noisy speech background. In: Proceedings of the EUROSPEECH 2003, 8-th European Conference on Speech Communication and Technology, Geneve, Switzerland (2003)"},{"key":"45_CR6","unstructured":"Fousek, P., Mizera, P., Pollak, P.: CtuCopy feature extraction tool. http:\/\/noel.feld.cvut.cz\/speechlab\/"},{"key":"45_CR7","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1006\/csla.1996.0013","volume":"10","author":"MJF Gales","year":"1996","unstructured":"Gales, M.J.F., Woodland, P.C.: Mean and variance adaptation within the MLLR framework. Comput. Speech Lang. 10, 249\u2013264 (1996)","journal-title":"Comput. Speech Lang."},{"key":"45_CR8","unstructured":"Ghoshal, A., Povey, D.: Sequence-discriminative training of deep neural networks. In: Proceedings of INTERSPEECH (2013)"},{"issue":"6","key":"45_CR9","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G., Mohamed, A., Jaitly, N., Senior, A., Vanhoucke, V., Nguyen, P., Sainath, T., Kingsbury, B.: Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process. Mag. 29(6), 82\u201397 (2012)","journal-title":"IEEE Signal Process. Mag."},{"key":"45_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1007\/978-3-319-24033-6_38","volume-title":"Text, Speech, and Dialogue","author":"O Klejch","year":"2015","unstructured":"Klejch, O., Pl\u00e1tek, O., \u017dilka, L., Jurc\u00edcek, F.: CloudASR: platform and service. In: Kr\u00e1l, P., et al. (eds.) TSD 2015. LNCS, vol. 9302, pp. 334\u2013341. Springer, Heidelberg (2015). doi: 10.1007\/978-3-319-24033-6_38"},{"key":"45_CR11","unstructured":"Korvas, M., Platek, O., Duvsek, O., Zilka, L., Jurcicek, F.: Free English and Czech telephone speech corpus shared under the CC-BY-SA 3.0 license. In: Proceedings of the LREC 2014: 9th International Conference on Language Resources and Evaluation, Reykjavik, Iceland (2014)"},{"key":"45_CR12","unstructured":"Lamere, P., Kwok, P., Gouvea, E., Raj, B., Singh, R., Walker, W., Warmuth, M., Wolf, P.: The CMU SPHINX-4 speech recognition system. In: IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2003, Hong Kong, China (2003)"},{"key":"45_CR13","unstructured":"Morbini, F., Audhkhasi, K., Sagae, K., Artstein, R., Can, D., Georgiou, P., Narayanan, S., Leuski, A., Traum, D.: Which ASR should I choose for my dialogue system? In: SIGDIAL, Reykjavik, Iceland (2013)"},{"key":"45_CR14","doi-asserted-by":"crossref","unstructured":"Nouza, J., Blavka, K., Bohac, M., \u010cerva, P., Malek, J.: System for producing subtitles to internet audio-visual documents. In: 2015 38th International Conference on Telecommunications and Signal Processing (TSP), pp. 1\u20135, July 2015","DOI":"10.1109\/TSP.2015.7296415"},{"key":"45_CR15","doi-asserted-by":"crossref","unstructured":"Nouza, J., \u017ddansky, J., \u010cerva, P.: System for automatic collection, annotation and indexing of Czech broadcast speech with full-text search. In: Proceedings of 15th IEEE MELECON Conference, pp. 202\u2013205, La Valleta, Malta (2010)","DOI":"10.1109\/MELCON.2010.5476306"},{"key":"45_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1007\/978-3-319-24033-6_49","volume-title":"Text, Speech, and Dialogue","author":"Z Patc","year":"2015","unstructured":"Patc, Z., Mizera, P., Pollak, P.: Phonetic segmentation using KALDI and reduced pronunciation detection in causal Czech speech. In: Kr\u00e1l, P., et al. (eds.) TSD 2015. LNCS, vol. 9302, pp. 433\u2013441. Springer, Heidelberg (2015). doi: 10.1007\/978-3-319-24033-6_49"},{"key":"45_CR17","unstructured":"Pollak, P., \u010cernock\u00fd, J.: Czech SPEECON adult database. Technical report, April 2004"},{"key":"45_CR18","unstructured":"Poll\u00e1k, P., Boudy, J., Choukri, K., Heuvel, H.V.D., Vicsi, K., Virag, A., Siemund, R., Majewski, W., Staroniewicz, P., Tropf, H., Kochanina, J., Ostroukhov, E., Rusko, M., Trnka, M.: SpeechDat(E)- Eastern European telephone speech databases. In: Proceedings of the XLDB 2000, Workshop on Very Large Telephone Speech Databases (2000)"},{"key":"45_CR19","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Motlicek, P., Qian, Y., Schwarz, P., Silovsky, J., Stemmer, G., Vesely, K.: The KALDI speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. IEEE Signal Processing Society, December 2011"},{"key":"45_CR20","first-page":"1002","volume":"20","author":"V Proch\u00e1zka","year":"2011","unstructured":"Proch\u00e1zka, V., Pollak, P., \u017ddansky, J., Nouza, J.: Performance of Czech speech recognition with language models created from public resources. Radioengineering 20, 1002\u20131008 (2011)","journal-title":"Radioengineering"},{"key":"45_CR21","unstructured":"Rybach, D., Hahn, S., Lehnen, P., Nolden, D., Sundermeyer, M., T\u00fcske, Z., Wiesler, S., Schl\u00fcter, R., Ney, H.: Rasr-the RWTH Aachen university open source speech recognition toolkit"},{"key":"45_CR22","doi-asserted-by":"crossref","unstructured":"Vesel\u00fd, K., Karafi\u00e1t, M., Grezl, F.: Convolutive bottleneck network features for LVCSR. In: 2011 IEEE Workshop on Automatic Speech Recognition and Understanding (2011)","DOI":"10.1109\/ASRU.2011.6163903"},{"key":"45_CR23","unstructured":"Young, S., et al.: The HTK Book, Version 3.4.1. Cambridge (2009)"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-45510-5_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T17:52:01Z","timestamp":1498326721000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-45510-5_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319455099","9783319455105"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-45510-5_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}