{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:13:35Z","timestamp":1780053215554,"version":"3.54.0"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319664286","type":"print"},{"value":"9783319664293","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-66429-3_51","type":"book-chapter","created":{"date-parts":[[2017,8,12]],"date-time":"2017-08-12T02:02:55Z","timestamp":1502503375000},"page":"512-524","source":"Crossref","is-referenced-by-count":15,"title":["Medical Speech Recognition: Reaching Parity with Humans"],"prefix":"10.1007","author":[{"given":"Erik","family":"Edwards","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wael","family":"Salloum","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Greg P.","family":"Finley","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"James","family":"Fone","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Greg","family":"Cardiff","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mark","family":"Miller","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Suendermann-Oeft","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2017,8,13]]},"reference":[{"issue":"3","key":"51_CR1","doi-asserted-by":"crossref","first-page":"585","DOI":"10.1148\/radiology.138.3.7465833","volume":"138","author":"B Leeming","year":"1981","unstructured":"Leeming, B., Porter, D., Jackson, J., Bleich, H., Simon, M.: Computerized radiologic reporting with voice data-entry. Radiology 138(3), 585\u2013588 (1981)","journal-title":"Radiology"},{"issue":"3","key":"51_CR2","doi-asserted-by":"crossref","first-page":"509","DOI":"10.1016\/S0094-1298(20)31576-5","volume":"13","author":"G Akers","year":"1986","unstructured":"Akers, G.: Using your voice: speech recognition technology in medicine and surgery. Clin. Plast. Surg. 13(3), 509\u2013511 (1986)","journal-title":"Clin. Plast. Surg."},{"issue":"3\u20134","key":"51_CR3","first-page":"243","volume":"12","author":"T Matumoto","year":"1987","unstructured":"Matumoto, T., Iinuma, T., Tateno, Y., Ikehira, H., Yamasaki, Y., Fukuhisa, K., Tsunemoto, H., Shishido, F., Kubo, Y., Inamura, K.: Automatic radiologic reporting system using speech recognition. Med. Prog. Technol. 12(3\u20134), 243\u2013257 (1987)","journal-title":"Med. Prog. Technol."},{"issue":"1\u20132","key":"51_CR4","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/0167-6393(95)00012-D","volume":"17","author":"V Steinbiss","year":"1995","unstructured":"Steinbiss, V., Ney, H., Essen, U., Tran, B.H., Aubert, X., Dugast, C., Kneser, R., Meier, H.G., Oerder, M., Haeb-Umbach, R., Geller, D., H\u00f6llerbauer, W., Bartosik, H.: Continuous speech dictation from theory to practice. Speech Commun. 17(1\u20132), 19\u201338 (1995)","journal-title":"Speech Commun."},{"issue":"7","key":"51_CR5","doi-asserted-by":"crossref","first-page":"1451","DOI":"10.1007\/s003300050869","volume":"9","author":"W Hundt","year":"1999","unstructured":"Hundt, W., Stark, O., Scharnberg, B., Hold, M., Kohz, P., Lienemann, A., Bon\u00e9l, H., Reiser, M.: Speech processing in radiology. Eur. Radiol. 9(7), 1451\u20131456 (1999)","journal-title":"Eur. Radiol."},{"issue":"3","key":"51_CR6","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1136\/jamia.1999.0060195","volume":"6","author":"A Zafar","year":"1999","unstructured":"Zafar, A., Overhage, J., McDonald, C.: Continuous speech recognition for clinicians. J. Am. Med. Inf. Assoc. 6(3), 195\u2013204 (1999)","journal-title":"J. Am. Med. Inf. Assoc."},{"issue":"5","key":"51_CR7","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1136\/jamia.2000.0070462","volume":"7","author":"E Devine","year":"2000","unstructured":"Devine, E., Gaehde, S., Curtis, A.: Comparative evaluation of three continuous speech recognition software packages in the generation of medical reports. J. Am. Med. Inf. Assoc. 7(5), 462\u2013468 (2000)","journal-title":"J. Am. Med. Inf. Assoc."},{"issue":"1","key":"51_CR8","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/j.jbi.2008.08.001","volume":"42","author":"J Paulett","year":"2009","unstructured":"Paulett, J., Langlotz, C.: Improving language models for radiology speech recognition. J. Biomed. Inf. 42(1), 53\u201358 (2009)","journal-title":"J. Biomed. Inf."},{"issue":"4","key":"51_CR9","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1007\/s10278-012-9455-9","volume":"25","author":"C Hawkins","year":"2012","unstructured":"Hawkins, C., Hall, S., Hardin, J., Salisbury, S., Towbin, A.: Prepopulated radiology report templates: a prospective analysis of error rate and turnaround time. J. Digit Imaging 25(4), 504\u2013511 (2012)","journal-title":"J. Digit Imaging"},{"issue":"3","key":"51_CR10","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1053\/sder.2002.34950","volume":"21","author":"K Smith","year":"2002","unstructured":"Smith, K.: A discrete speech recognition system for dermatology: 8 years of daily experience in a medical dermatology office. Semin. Cutan. Med. Surg. 21(3), 205\u2013208 (2002)","journal-title":"Semin. Cutan. Med. Surg."},{"issue":"2","key":"51_CR11","doi-asserted-by":"crossref","first-page":"168","DOI":"10.3414\/ME9301","volume":"49","author":"R Hippmann","year":"2010","unstructured":"Hippmann, R., Dost\u00e1lov\u00e1, T., Zv\u00e1rov\u00e1, J., Nagy, M., Seydlov\u00e1, M., Hanzl\u00edcek, P., Kriz, P., Sm\u00eddl, L., Trmal, J.: Voice-supported electronic health record for temporomandibular joint disorders. Methods Inf. Med. 49(2), 168\u2013172 (2010)","journal-title":"Methods Inf. Med."},{"issue":"e1","key":"51_CR12","doi-asserted-by":"crossref","first-page":"e169","DOI":"10.1093\/jamia\/ocv152","volume":"23","author":"T Hodgson","year":"2016","unstructured":"Hodgson, T., Coiera, E.: Risks and benefits of speech recognition for clinical documentation: a systematic review. J. Am. Med. Inf. Assoc. 23(e1), e169\u2013e179 (2016)","journal-title":"J. Am. Med. Inf. Assoc."},{"issue":"2","key":"51_CR13","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1177\/183335831504400201","volume":"44","author":"I Hammana","year":"2015","unstructured":"Hammana, I., Lepanto, L., Poder, T., Bellemare, C., Ly, M.S.: Speech recognition in the radiology department: a systematic review. HIM. J. 44(2), 4\u201310 (2015)","journal-title":"HIM. J."},{"issue":"2","key":"51_CR14","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1016\/j.jbi.2011.01.004","volume":"44","author":"YG Cao","year":"2011","unstructured":"Cao, Y.G., Liu, F., Simpson, P., Antieau, L., Bennett, A., Cimino, J., Ely, J., Yu, H.: Askhermes: an online question answering system for complex clinical questions. J. Biomed. Inf. 44(2), 277\u2013288 (2011)","journal-title":"J. Biomed. Inf."},{"issue":"5","key":"51_CR15","doi-asserted-by":"crossref","first-page":"625","DOI":"10.1136\/amiajnl-2010-000071","volume":"18","author":"F Liu","year":"2011","unstructured":"Liu, F., Tur, G., Hakkani-T\u00fcr, D., Yu, H.: Towards spoken clinical-question answering: evaluating and adapting automatic speech-recognition systems for spoken clinical questions. J. Am. Med. Inf. Assoc. 18(5), 625\u2013630 (2011)","journal-title":"J. Am. Med. Inf. Assoc."},{"key":"51_CR16","unstructured":"Luu, T., Phan, R., Davey, R., Hanlen, L., Chetty, G.: Automatic clinical speech recognition for CLEF 2015 ehealth challenge. Working notes report\/paper, University of Canberra (2015)"},{"key":"51_CR17","series-title":"IFMBE Proceedings","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1007\/978-3-319-12967-9_26","volume-title":"16th Nordic-Baltic Conference on Biomedical Engineering","author":"A Paats","year":"2015","unstructured":"Paats, A., Alum\u00e4e, T., Meister, E., Fridolin, I.: Evaluation of automatic speech recognition prototype for estonian language in radiology domain: a pilot study. In: Mindedal, H., Persson, M. (eds.) 16th Nordic-Baltic Conference on Biomedical Engineering. IFMBE Proceedings, vol. 48, pp. 96\u201399. Springer, Cham (2015). doi: 10.1007\/978-3-319-12967-9_26"},{"key":"51_CR18","unstructured":"Alum\u00e4e, T.: Full-duplex speech-to-text system for Estonian. In: Proceedings of the Baltic HLT, Kaunas, Lithuania, pp. 3\u201310. IOS Press (2014)"},{"issue":"8","key":"51_CR19","first-page":"1","volume":"15","author":"J Toit du","year":"2015","unstructured":"du Toit, J., Hattingh, R., Pitcher, R.: The accuracy of radiology speech recognition reports in a multilingual south african teaching hospital. BMC Med. Imaging 15(8), 1 (2015)","journal-title":"BMC Med. Imaging"},{"issue":"5","key":"51_CR20","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1111\/j.1754-9485.2010.02193.x","volume":"54","author":"R Strahan","year":"2010","unstructured":"Strahan, R., Schneider-Kolsky, M.: Voice recognition versus transcriptionist: error rates and productivity in MRI reporting. J. Med. Imaging Radiat. Oncol. 54(5), 411\u2013414 (2010)","journal-title":"J. Med. Imaging Radiat. Oncol."},{"issue":"4","key":"51_CR21","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1053\/ajem.2001.24487","volume":"19","author":"R Zick","year":"2001","unstructured":"Zick, R., Olsen, J.: Voice recognition software versus a traditional transcription service for physician charting in the ed. Am. J. Emerg. Med. 19(4), 295\u2013298 (2001)","journal-title":"Am. J. Emerg. Med."},{"issue":"8","key":"51_CR22","first-page":"1","volume":"15","author":"J DuToit","year":"2015","unstructured":"DuToit, J., Hattingh, R., Pitcher, R.: The accuracy of radiology speech recognition reports in a multilingual South African teaching hospital. BMC Med. Imaging 15(8), 1\u20135 (2015)","journal-title":"BMC Med. Imaging"},{"issue":"4","key":"51_CR23","doi-asserted-by":"crossref","first-page":"923","DOI":"10.2214\/AJR.11.6691","volume":"197","author":"S Basma","year":"2011","unstructured":"Basma, S., Lord, B., Jacks, L., Rizk, M., Scaranelo, A.: Error rates in breast imaging reports: comparison of automatic speech recognition and dictation transcription. AJR Am. J. Roentgenol. 197(4), 923\u2013927 (2011)","journal-title":"AJR Am. J. Roentgenol."},{"key":"51_CR24","unstructured":"Suendermann-Oeft, D., Ghaffarzadegan, S., Edwards, E., Salloum, W., Miller, M.: A system for automated extraction of clinical standard codes in spoken medical reports. In: Proceedings of Workshop SLT, San Diego, CA. IEEE (2016)"},{"key":"51_CR25","doi-asserted-by":"crossref","unstructured":"Zechner, K.: What did they actually say? Agreement and disagreement among transcribers of non-native spontaneous speech responses in an English proficiency test. In: Proceedings of SLaTE, Warwickshire, UK, pp. 25\u201328. ISCA (2009)","DOI":"10.21437\/SLaTE.2009-7"},{"key":"51_CR26","unstructured":"Xiong, W., Droppo, J., Huang, X., Seide, F., Seltzer, M., Stolcke, A., Yu, D., Zweig, G.: Achieving human parity in conversational speech recognition, pp. 1\u201313 (2017). arXiv:1610.05256"},{"key":"51_CR27","unstructured":"Saon, G., Kurata, G., Sercu, T., Audhkhasi, K., Thomas, S., Dimitriadis, D., Cui, X., Ramabhadran, B., Picheny, M., Lim, L.L., Roomi, B., Hall, P.: English conversational telephone speech recognition by humans and machines, pp. 1\u20137 (2017). arXiv:1703.02136"},{"key":"51_CR28","doi-asserted-by":"crossref","first-page":"280","DOI":"10.1002\/9781118541241.ch10","volume-title":"Crowdsourcing for Speech Processing","author":"D Suendermann","year":"2013","unstructured":"Suendermann, D., Pieraccini, R.: Crowdsourcing for industrial spoken dialog systems. In: Esk\u00e9nazi, M., Levow, G.A., Meng, H., Parent, G., Suendermann, D. (eds.) Crowdsourcing for Speech Processing, pp. 280\u2013302. J. Wiley, Chichester (2013)"},{"key":"51_CR29","unstructured":"Salloum, W., Edwards, E., Ghaffarzadegan, S., Suendermann-Oeft, D., Miller, M.: Crowdsourced continuous improvement of medical speech recognition. In: Proceedings of AAAI Workshop Crowdsourcing, San Francisco, CA. AAAI (2017)"},{"key":"51_CR30","doi-asserted-by":"crossref","unstructured":"Glass, J., Hazen, T., Hetherington, I.: Real-time telephone-based speech recognition in the Jupiter domain. In: Proceedings of ICASSP, vol. 1, pp. 61\u201364. IEEE (1999)","DOI":"10.1109\/ICASSP.1999.758062"},{"issue":"1","key":"51_CR31","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1006\/csla.2001.0184","volume":"16","author":"M Mohri","year":"2002","unstructured":"Mohri, M., Pereira, F., Riley, M.: Weighted finite-state transducers in speech recognition. Comput. Speech Lang. 16(1), 69\u201388 (2002)","journal-title":"Comput. Speech Lang."},{"key":"51_CR32","unstructured":"Walker, W., Lamere, P., Kwok, P., Raj, B., Singh, R., Gouv\u00eaa, E., Wolf, P., Woelfel, J.: Sphinx-4: a flexible open source framework for speech recognition. Technical report SMLI TR2004-0811, Sun Microsystems, Inc. (2004)"},{"key":"51_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/978-3-540-76336-9_3","volume-title":"Implementation and Application of Automata","author":"C Allauzen","year":"2007","unstructured":"Allauzen, C., Riley, M., Schalkwyk, J., Skut, W., Mohri, M.: OpenFst: a general and efficient weighted finite-state transducer library. In: Holub, J., \u017dd\u2019\u00e1rek, J. (eds.) CIAA 2007. LNCS, vol. 4783, pp. 11\u201323. Springer, Heidelberg (2007). doi: 10.1007\/978-3-540-76336-9_3"},{"key":"51_CR34","unstructured":"Gorman, K.: Openfst library (2016). http:\/\/openfst.org"},{"key":"51_CR35","unstructured":"Povey, D., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Motl\u00edcek, P., Qian, Y., Schwarz, P., Silovsky, J.: The kaldi speech recognition toolkit. In: Proceedings of Workshop ASRU, 4 p. IEEE (2011)"},{"issue":"2","key":"51_CR36","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"Boll, S.: Suppression of acoustic noise in speech using spectral subtraction. IEEE Trans. Acoust. 27(2), 113\u2013120 (1979)","journal-title":"IEEE Trans. Acoust."},{"issue":"45821","key":"51_CR37","first-page":"1","volume":"2007","author":"K Hermus","year":"2007","unstructured":"Hermus, K., Wambacq, P., Van Hamme, H.: A review of signal subspace speech enhancement and its application to noise robust speech recognition. EURASIP J. Adv. Signal Process. 2007(45821), 1\u201315 (2007)","journal-title":"EURASIP J. Adv. Signal Process."},{"key":"51_CR38","unstructured":"Zwicker, E., Feldtkeller, R.: Das Ohr als Nachrichtenempf\u00e4nger, 2nd edn. Monographien der elektrischen Nachrichtentechnik; Bd. 19. Hirzel, Stuttgart (1967)"},{"key":"51_CR39","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-68888-4","volume-title":"Psychoacoustics: Facts and Models","author":"H Fastl","year":"2007","unstructured":"Fastl, H., Zwicker, E.: Psychoacoustics: Facts and Models, 3rd edn. Springer, Berlin (2007)","edition":"3"},{"key":"51_CR40","doi-asserted-by":"crossref","unstructured":"Kim, C., Stern, R.: Power-normalized cepstral coefficients (PNCC) for robust speech recognition. In: Proceedings of ICASSP, pp. 4101\u20134104. IEEE (2012)","DOI":"10.1109\/ICASSP.2012.6288820"},{"issue":"7","key":"51_CR41","doi-asserted-by":"crossref","first-page":"1315","DOI":"10.1109\/TASLP.2016.2545928","volume":"24","author":"C Kim","year":"2016","unstructured":"Kim, C., Stern, R.: Power-normalized cepstral coefficients (PNCC) for robust speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(7), 1315\u20131329 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"51_CR42","doi-asserted-by":"crossref","unstructured":"Imai, S.: Cepstral analysis synthesis on the mel frequency scale. In: Proceedings of ICASSP, vol. 8, pp. 93\u201396. IEEE (1983)","DOI":"10.1109\/ICASSP.1983.1172250"},{"issue":"3","key":"51_CR43","doi-asserted-by":"crossref","first-page":"329","DOI":"10.2307\/1417526","volume":"53","author":"S Stevens","year":"1940","unstructured":"Stevens, S., Volkmann, J.: The relation of pitch to frequency: a revised scale. Am. J. Psychol. 53(3), 329\u2013353 (1940)","journal-title":"Am. J. Psychol."},{"key":"51_CR44","doi-asserted-by":"crossref","unstructured":"Hermansky, H.: An efficient speaker-independent automatic speech recognition by simulation of some properties of human auditory perception. In: Proceedings of ICASSP, vol. 12, pp. 1159\u20131162. IEEE (1987)","DOI":"10.1109\/ICASSP.1987.1169803"},{"key":"51_CR45","doi-asserted-by":"crossref","unstructured":"Murthi, M., Rao, B.: Minimum variance distortionless response (MVDR) modeling of voiced speech. In: Proceedings of ICASSP, vol. 3, pp. 1687\u20131690. IEEE (1997)","DOI":"10.1109\/ICASSP.1997.598838"},{"key":"51_CR46","doi-asserted-by":"crossref","unstructured":"Yapanel, U., Dharanipragada, S., Hansen, J.: Perceptual mvdr-based cepstral coefficients (PMCCS) for high accuracy speech recognition. In: Proceedings of EUROSPEECH, pp. 1829\u20131832. ISCA (2003)","DOI":"10.21437\/Eurospeech.2003-553"},{"issue":"7","key":"51_CR47","doi-asserted-by":"crossref","first-page":"947","DOI":"10.1109\/TASSP.1987.1165237","volume":"35","author":"BH Juang","year":"1987","unstructured":"Juang, B.H., Rabiner, L., Wilpon, J.: On the use of bandpass liftering in speech recognition. IEEE Trans. Acoust. 35(7), 947\u2013954 (1987)","journal-title":"IEEE Trans. Acoust."},{"key":"51_CR48","doi-asserted-by":"crossref","unstructured":"Rath, S., Povey, D., Vesel\u00fd, K., Cernock\u00fd, J.: Improved feature processing for deep neural networks. In: Proceedings of INTERSPEECH, pp. 109\u2013113. ISCA (2013)","DOI":"10.21437\/Interspeech.2013-48"},{"issue":"9\/10","key":"51_CR49","first-page":"341","volume":"5","author":"P Boersma","year":"2002","unstructured":"Boersma, P., van Heuven, V.: Praat, a system for doing phonetics by computer. Glot. Int. 5(9\/10), 341\u2013345 (2002)","journal-title":"Glot. Int."},{"key":"51_CR50","unstructured":"Young, S., Evermann, G., Gales, M., Hain, T., Kershaw, D., Liu, X., Moore, G., Odell, J., Ollason, D., Povey, D., Valtchev, V., Woodland, P.: The HTK book (for HTK version 3.4). Book HTK Version 3.4, Cambridge University Engineering Department, March 2009"},{"key":"51_CR51","unstructured":"Lee, A.: The Julius Book. Nagoya Institute of Technology, May 2010"},{"key":"51_CR52","unstructured":"Rybach, D., Hahn, S., Lehnen, P., Nolden, D., Sundermeyer, M., T\u00fcske, Z., Wiesler, S., Schl\u00fcter, R., Ney, H.: RASR - the RWTH Aachen University open source speech recognition toolkit. In: Proceedings of ASRU Workshop, IEEE (2011)"},{"key":"51_CR53","unstructured":"Gaida, C., Lange, P., Petrick, R., Proba, P., Malatawy, A., Suendermann-Oeft, D.: Comparing open-source speech recognition toolkits. Technical report, DHBW, October 2014"},{"issue":"1","key":"51_CR54","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/neco.1989.1.1.1","volume":"1","author":"R Lippmann","year":"1989","unstructured":"Lippmann, R.: Review of neural networks for speech recognition. Neural Comput. 1(1), 1\u201338 (1989)","journal-title":"Neural Comput."},{"issue":"2\u20133","key":"51_CR55","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/0167-6393(92)90018-3","volume":"11","author":"H Bourlard","year":"1992","unstructured":"Bourlard, H., Morgan, N., Renals, S.: Neural nets and hidden markov models: review and generalizations. Speech Commun. 11(2\u20133), 237\u2013246 (1992)","journal-title":"Speech Commun."},{"issue":"6","key":"51_CR56","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2006","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G., Mohamed, A.R., Jaitly, N., Senior, A., Vanhoucke, V., Nguyen, P., Sainath, T., Kingsbury, B.: Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process. Mag. 29(6), 82\u201397 (2006)","journal-title":"IEEE Signal Process. Mag."},{"key":"51_CR57","unstructured":"Pl\u00e1tek, O.: Speech recognition using Kaldi. Masters thesis, Charles University (2014)"},{"key":"51_CR58","unstructured":"Gil, V.: Automatic speech recognition with Kaldi toolkit. Doctoral thesis, University Polit\u00e8cnica de Catalunya (2016)"},{"key":"51_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, X., Trmal, J., Povey, D., Khudanpur, S.: Improving deep neural network acoustic models using generalized maxout networks. In: Proceedings of ICASSP, pp. 215\u2013219. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6853589"},{"key":"51_CR60","unstructured":"Miao, Y.: Kaldi + pdnn: building dnn-based ASR systems with kaldi and PDNN, 4 p. (2014). arXiv:1401.6984"},{"key":"51_CR61","doi-asserted-by":"crossref","unstructured":"Povey, D., Chu, S., Varadarajan, B.: Universal background model based speech recognition. In: Proceedings of ICASSP, pp. 4561\u20134564. IEEE (2008)","DOI":"10.1109\/ICASSP.2008.4518671"},{"key":"51_CR62","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Povey, D.: Time delay deep neural network-based universal background models for speaker recognition. In: Proceedings of Workshop ASRU, pp. 92\u201397. IEEE (2015)","DOI":"10.1109\/ASRU.2015.7404779"},{"key":"51_CR63","doi-asserted-by":"crossref","unstructured":"Dehak, N., Dehak, R., Kenny, P., Br\u00fcmmer, N., Ouellet, P., Dumouchel, P.: Support vector machines versus fast scoring in the low-dimensional total variability space for speaker verification. In: Proceedings of INTERSPEECH, pp. 1559\u20131562. ISCA (2009)","DOI":"10.21437\/Interspeech.2009-385"},{"key":"51_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Yan, Z.J., Huo, Q.: A new i-vector approach and its application to irrelevant variability normalization based acoustic model training. In: Proceedings of Workshop MLSP, pp. 1\u20136. IEEE (2011)","DOI":"10.1109\/MLSP.2011.6064637"},{"key":"51_CR65","doi-asserted-by":"crossref","unstructured":"Karafi\u00e1t, M., Burget, L., Matejka, P., Glembek, O., Cernock\u00fd, J.: ivector-based discriminative adaptation for automatic speech recognition. In: Proceedings of Workshop ASRU. IEEE (2011)","DOI":"10.1109\/ASRU.2011.6163922"},{"key":"51_CR66","doi-asserted-by":"crossref","unstructured":"Senior, A., Lopez-Moreno, I.: Improving DNN speaker independence with i-vector inputs. In: Proceedings of ICASSP, pp. 225\u2013229. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6853591"},{"key":"51_CR67","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Chen, G., Manohar, V., Ko, T., Povey, D., Khudanpur, S.: Jhu aspire system: robust LVCSR with TDNNS, ivector adaptation and RNN-LMS. In: Proceedings of Workshop on ASRU, pp. 539\u2013546. IEEE (2015)","DOI":"10.1109\/ASRU.2015.7404842"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-66429-3_51","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T06:33:21Z","timestamp":1719383601000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-66429-3_51"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319664286","9783319664293"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-66429-3_51","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]}}}