{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T04:41:28Z","timestamp":1659415288247},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,9,4]],"date-time":"2017-09-04T00:00:00Z","timestamp":1504483200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s10772-017-9453-x","type":"journal-article","created":{"date-parts":[[2017,9,4]],"date-time":"2017-09-04T03:37:43Z","timestamp":1504496263000},"page":"859-867","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Factored front-end CMLLR for joint speaker and environment normalization under DNN-HMM"],"prefix":"10.1007","volume":"20","author":[{"given":"Shakti P.","family":"Rath","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,4]]},"reference":[{"key":"9453_CR1","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1006\/csla.1995.0010","volume":"9","author":"CJ Leggetter","year":"1995","unstructured":"Leggetter, C. J., & Woodland, P. C. (1995). Maximum likelihood linear regression for speaker adaptation of continuous density hidden markov models. Computer Speech and Language, 9, 171\u2013185.","journal-title":"Computer Speech and Language"},{"key":"9453_CR2","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"MJF Gales","year":"1998","unstructured":"Gales, M. J. F. (1998). Maximum likelihood linear transformations for HMM-based speech recognition. Computer Speech and Language, 12, 75\u201398.","journal-title":"Computer Speech and Language"},{"key":"9453_CR3","doi-asserted-by":"crossref","unstructured":"Acero, A., & Deng, L., et\u00a0al. (2000). HMM adaptation using vector taylor series for noisy speech recognition. In Proceedings of ICSLP.","DOI":"10.21437\/ICSLP.2000-672"},{"key":"9453_CR4","unstructured":"Gales, M.\u00a0J.\u00a0F. (1995). Model-based techniques for noise robust speech recognition. In Ph.D. thesis, Cambridge: Cambridge University."},{"key":"9453_CR5","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., Deng, L., et al. (2012). Deep Neural Networks for acoustic modeling in speech recognition: The shared views of four research groups. Signal Processing Magazine, IEEE, 29, 82\u201397.","journal-title":"Signal Processing Magazine, IEEE"},{"key":"9453_CR6","doi-asserted-by":"crossref","unstructured":"Seide, F., Li, G., Chen, X., & Yu, D. (2011). Feature engineering in context-dependent Deep Neural Networks for conversational speech transcription. In Proceedings of IEEE ASRU.","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"9453_CR7","doi-asserted-by":"crossref","unstructured":"Jaitly, N., Nguyen, P., Senior, A., & Vanhoucke, V. (2012). Application of pretrained deep neural networks to large vocabulary speech recognition. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2012-10"},{"key":"9453_CR8","doi-asserted-by":"crossref","unstructured":"Liao, H., McDermott, E., & Senior, A. (2013). Large scale Deep Neural Network acoustic modeling with semi-supervised training data for Youtube video transcription. In Proceedings of ASRU.","DOI":"10.1109\/ASRU.2013.6707758"},{"key":"9453_CR9","doi-asserted-by":"crossref","unstructured":"Abrash, V., Franco, H., Sankar, A., & Cohen, M. (1995). Connectionist speaker normalization and adaptation. In Proceedings of International Speech.","DOI":"10.21437\/Eurospeech.1995-414"},{"key":"9453_CR10","doi-asserted-by":"crossref","unstructured":"Gemello, R., Mana, F., Scanzio, S., Laface, P., & Mori, R.\u00a0D. (2006). Adaptation of hybrid ann\/hmm models using linear hidden transformations and conservative training. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2006.1660239"},{"key":"9453_CR11","doi-asserted-by":"crossref","unstructured":"Price, R., Iso, K., & Shinoda, K. (2014). Speaker adaptation of deep neural networks using a hierarchy of output layers. In Proceedings of IEEE SLT.","DOI":"10.1109\/SLT.2014.7078566"},{"key":"9453_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Z., Li, J., Siniscalchi, S.\u00a0M., Chen, I.-F., Wu, J., & Lee, C.\u00a0H. (2015). Rapid adaptation for deep neural networks through multi-task learning. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2015-719"},{"key":"9453_CR13","doi-asserted-by":"crossref","unstructured":"Yu, D., Yao, K., Su, H., Li, G., & Seide, F. (2013). Kl-divergence regularized deep neural network adaptation for improved large vocabulary speech recognition. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2013.6639201"},{"key":"9453_CR14","doi-asserted-by":"crossref","unstructured":"Huang, Y., Slaney, M., Seltzer, M.\u00a0L., & Gong, Y. (2014). Towards better performance with heterogeneous training data in acoustic modeling using deep neural networks. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2014-214"},{"key":"9453_CR15","doi-asserted-by":"crossref","unstructured":"Tan, T., Qian, Y., Yin, M., Zhuang, Y., Yu, & K. (2015). Cluster adaptive training for deep neural network. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2015.7178787"},{"key":"9453_CR16","doi-asserted-by":"crossref","unstructured":"Miao, Y., Jiang, L., Zhang, H., & Metze, F. (2014). Improvements to speaker adaptive training of deep neural networks. In Proceedings of IEEE SLT.","DOI":"10.1109\/SLT.2014.7078568"},{"key":"9453_CR17","doi-asserted-by":"crossref","unstructured":"G.\u00a0Saon, H.\u00a0Soltau, D.\u00a0Nahamoo, Picheny, M. (2013) Speaker adaptation of Neural Network Acoustic models using I-vectors. In Proceedings of ASRU.","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"9453_CR18","doi-asserted-by":"crossref","unstructured":"Senior, A., & Lopez-Moreno, I. (2014). Improving DNN Speaker Independence With I-Vector Inputs.In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2014.6853591"},{"key":"9453_CR19","doi-asserted-by":"crossref","unstructured":"Garimella, S., Mandal, A., & Strom, N., et\u00a0al. (2015). Robust I-vector based adaptation of DNN Acoustic Model for speech recognition. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2015-605"},{"key":"9453_CR20","doi-asserted-by":"crossref","unstructured":"Variani, E., Lei, X., & McDermott, E., et\u00a0al. (2014). Deep neural networks for small footprint text-dependent speaker verification. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"9453_CR21","doi-asserted-by":"crossref","unstructured":"Seltzer, M.\u00a0L., Yu, D., & Wang, Y. (2013). An investigation of deep neural networks for noise robust speech recognition. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"9453_CR22","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., & Jiang, H. (2013). Fast speaker adaptation of hybrid nn\/hmm model for speech recognition based on discriminative learning of speaker code. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2013.6639211"},{"key":"9453_CR23","doi-asserted-by":"crossref","unstructured":"Qian, Y., Tan, T., Yu, D., & Zhang, Y. (2016) Integrated adaptation with multi-factor joint-learning for far-field speech recognition. In Proceedings of ICASSP.","DOI":"10.1109\/ICASSP.2016.7472783"},{"key":"9453_CR24","doi-asserted-by":"crossref","unstructured":"Karanasou, P., Wang, M.\u00a0J. F.\u00a0G. Y., & Woodland, P.\u00a0C. (2014). Adaptation of deep neural network acoustic models using factorised i-vectors. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2014-488"},{"key":"9453_CR25","doi-asserted-by":"crossref","unstructured":"Liao, H., & Gales, M.\u00a0J.\u00a0F. (2005). Joint uncertainty decoding for noise robust speech recognition. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2005-265"},{"key":"9453_CR26","doi-asserted-by":"crossref","unstructured":"Rath, S.\u00a0P., Burget, L., Karafiat, M., Glembek, O., & Cernocky, J. (2013). A region-specific feature-space transformation for speaker adaptation and singularity analysis of Jacobian matrix. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2013-332"},{"key":"9453_CR27","doi-asserted-by":"crossref","unstructured":"Gales, M.\u00a0J.\u00a0F., & Flego, F. (2012). Model-based approaches for degraded channel modelling in robust ASR. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2012-371"},{"key":"9453_CR28","doi-asserted-by":"crossref","unstructured":"Rath, S., Sivadas, S., & Ma, B. (2015). Joint environment and speaker normalization using factored front-end CMLLR. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2015-598"},{"key":"9453_CR29","doi-asserted-by":"crossref","unstructured":"Gales, M.\u00a0J.\u00a0F. (2001). Acoustic factorisation. In Proceedings of ASRU.","DOI":"10.1109\/ASRU.2001.1034593"},{"key":"9453_CR30","doi-asserted-by":"crossref","unstructured":"Wang, Y.\u00a0Q., & Gales, M.\u00a0J.\u00a0F. (2013). An explicit independence constraint for factorised adaptation in speech recognition. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2013-333"},{"key":"9453_CR31","doi-asserted-by":"crossref","unstructured":"Seltzer, M., & Acero, A. (2011). Separating speaker and environmental variability using factored transforms. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2011-415"},{"key":"9453_CR32","doi-asserted-by":"crossref","unstructured":"Seltzer, M., & Acero, A. (2012). Factored adaptation using a combination of feature-space and model-space transforms. In Proceedings of International Speech.","DOI":"10.21437\/Interspeech.2012-488"},{"key":"9453_CR33","doi-asserted-by":"crossref","unstructured":"Seo, H., Kang, H.-G., & Seltzer, M.\u00a0L. (2014). Factored adaptation of speaker and environment using orthogonal subspace transforms. In Proceedings of IEEE ICASSP.","DOI":"10.1109\/ICASSP.2014.6854201"},{"key":"9453_CR34","unstructured":"Parihar, N., & Picone, J. (2002). Aurora working group: DSR frontend LVCSR evaluation AU\/384\/02. In Technical Report, Institute for Signal and Information Processing, Mississippi: Mississippi State University."},{"key":"9453_CR35","unstructured":"Povey, D., & Ghoshal, A., et\u00a0al. (2011). The Kaldi Speech Recognition Toolkit. In Proceedings of IEEE ASRU."},{"key":"9453_CR36","doi-asserted-by":"crossref","unstructured":"Gopinath, R. (1998). Maximum likelihood modeling with Gaussian distributions for classification. In Proceedings IEEE ICASSP.","DOI":"10.1109\/ICASSP.1998.675351"},{"key":"9453_CR37","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1109\/89.759034","volume":"7","author":"MJF Gales","year":"1999","unstructured":"Gales, M. J. F. (1999). Semi-tied covariance matrices for hidden Markov models. IEEE Transactions Speech and Audio Proceedings, 7, 272\u2013281.","journal-title":"IEEE Trans Speech and Audio Proc"},{"key":"9453_CR38","unstructured":"Hinton, G. (2010). A Practical Guide to Training Restricted Boltzmann Machines. https:\/\/www.cs.toronto.edu\/~hinton\/absps\/guideTR.pdf ."},{"key":"9453_CR39","doi-asserted-by":"crossref","unstructured":"Vesely, K., Karafiat, M., & Grezl, F. (2011). Convolutive Bottleneck Network features for LVCSR. In Proceedings of IEEE ASRU.","DOI":"10.1109\/ASRU.2011.6163903"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9453-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9453-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9453-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T04:09:50Z","timestamp":1659413390000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9453-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9,4]]},"references-count":39,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["9453"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9453-x","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,9,4]]}}}