{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:43:02Z","timestamp":1761597782148,"version":"3.37.3"},"reference-count":22,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,2,1]],"date-time":"2017-02-01T00:00:00Z","timestamp":1485907200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["14YF1409300"],"award-info":[{"award-number":["14YF1409300"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003395","name":"Shanghai Municipal Education Commission","doi-asserted-by":"publisher","award":["ZZshsf14026"],"award-info":[{"award-number":["ZZshsf14026"]}],"id":[{"id":"10.13039\/501100003395","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1007\/s10772-017-9399-z","type":"journal-article","created":{"date-parts":[[2017,2,1]],"date-time":"2017-02-01T15:37:04Z","timestamp":1485963424000},"page":"171-178","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Domain adaptation of lattice-free MMI based TDNN models for speech recognition"],"prefix":"10.1007","volume":"20","author":[{"given":"Yanhua","family":"Long","sequence":"first","affiliation":[]},{"given":"Yijie","family":"Li","sequence":"additional","affiliation":[]},{"given":"Hone","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Hongwei","family":"Mao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,1]]},"reference":[{"key":"9399_CR1","doi-asserted-by":"crossref","unstructured":"Bell, P., Gales, M., Lanchantin, P., Liu, X., Long, Y., Renals, S., et al. (2012). Transcription of multi-genre media archives using out-of-domain data. In Proceedings of Workshop on Spoken Language Technology, IEEE (pp. 324\u2013329).","DOI":"10.1109\/SLT.2012.6424244"},{"key":"9399_CR2","doi-asserted-by":"crossref","unstructured":"Christensen, H., Aniol, M. B., Bell, P., Green, P., Hain, T., King, S., et al. (2013). Combining in-domain and out-of-domain speech data for automatic recognition of disordered speech. In Proceedings of Interspeech, ISCA (pp. 3642\u20133645).","DOI":"10.21437\/Interspeech.2013-324"},{"key":"9399_CR3","doi-asserted-by":"crossref","unstructured":"Fainberg, J., Bell, P., Lincoln, M., & Renals, S. (2016). Improving children\u2019s speech recognition through out-of-domain data augmentation. In Proceedings of Interspeech, ISCA (pp. 1598\u20131602).","DOI":"10.21437\/Interspeech.2016-1348"},{"key":"9399_CR4","doi-asserted-by":"crossref","unstructured":"Gauvain, J., & Lee, C. (1992). MAP estimation of continuous density HMM: Theory and applications. In Proceedings of Workshop on Speech and Natural Language, Association for Computational Linguistics (pp. 185\u2013190).","DOI":"10.3115\/1075527.1075568"},{"key":"9399_CR5","doi-asserted-by":"crossref","unstructured":"Huang, Y., Yu, D., Liu, C., & Gong, Y. (2014). Multi-accent deep neural network acoustic model with accent-specific top layer using the KLD-regularized model adaptation. In Proceedings of Interspeech, ISCA (pp. 2977\u20132981).","DOI":"10.21437\/Interspeech.2014-497"},{"key":"9399_CR6","doi-asserted-by":"crossref","unstructured":"Huang, Z., Tang, J., Xue, S., & Dai, L. (2016). Speaker adaptation of RNN-BLSTM for speech recognition based on speaker code. In Proceedings of ICASSP, IEEE (pp. 5305\u20135309).","DOI":"10.1109\/ICASSP.2016.7472690"},{"key":"9399_CR7","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1006\/csla.1995.0010","volume":"9","author":"c Legetter","year":"1995","unstructured":"Legetter, c, & Woodland, P. (1995). Maximum likelihood linear regression for speaker adaptation of continuous density Hidden Markov models. Computer Speech and Language, 9, 171\u2013185.","journal-title":"Computer Speech and Language"},{"key":"9399_CR8","doi-asserted-by":"crossref","unstructured":"Mirsamadi, S., & Hansen, J. (2015). A study on deep neural network acoustic model adaptation for robust far-field speech recognition. In Proceedings of Interspeech, ISCA (pp. 2430\u20132434).","DOI":"10.21437\/Interspeech.2015-525"},{"key":"9399_CR9","unstructured":"Peddinti, V., Povey, D., & Khudanpur, S. (2015). A time delay neural network architecture for different modeling of long temporal contexts. In Proceedings of Interspeech, ISCA (pp. 3214\u20133218)."},{"key":"9399_CR10","unstructured":"Povey, D., (2005). Discriminative training for large vocabulary speech recognition. PhD dissertation, Cambridge University."},{"key":"9399_CR11","unstructured":"Povey, D., (2016). Kaldi code repository. Retrieved from https:\/\/github.com\/kaldi-asr\/kaldi ."},{"key":"9399_CR12","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., et. al. (2011). The Kaldi speech recognition toolkit. In Proceedings of ASRU, IEEE (pp. No. EPFL\u2013CONF\u2013192584)."},{"key":"9399_CR13","doi-asserted-by":"crossref","unstructured":"Povey, D., Peddinti, V., Galvez, D., Ghahrmani, P., Manohar, V., Na, X., et al. (2016). Purely sequence-trained neural networks for ASR based on lattice-free MMI. In Proceedings of Interspeech, ISCA (pp. 2751\u20132755).","DOI":"10.21437\/Interspeech.2016-595"},{"key":"9399_CR14","doi-asserted-by":"crossref","unstructured":"Qian, Y., Tan, T., Yu, D., & Zhang, Y. (2016). Integrated adaptation with multi-factor joint-learning for far-field speech recognition. In Proceedings of ICASSP, IEEE (pp. 5770\u20135774).","DOI":"10.1109\/ICASSP.2016.7472783"},{"key":"9399_CR15","doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Rao, K., & Beaufays, F. (2015). Fast and accurate recurrent neural network acoustic models for speech recognition. In Proceedings of Interspeech, ISCA (pp. 1468\u20131472).","DOI":"10.21437\/Interspeech.2015-350"},{"key":"9399_CR16","doi-asserted-by":"crossref","unstructured":"Saon, G., Soltau, H., Nahamoo, D., & Picheny, M. (2013). Speaker adaptation of neural network acoustic models using i-vectors. In Proceedings of ASRU, Olomouc (pp. 55\u201359).","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"9399_CR17","doi-asserted-by":"crossref","unstructured":"Senior, A., & Lopez-Moreno, I. (2014). Improving DNN speaker independence with i-vector inputs. In Proceedings of ICASSP, IEEE (pp. 225\u2013229).","DOI":"10.1109\/ICASSP.2014.6853591"},{"key":"9399_CR18","unstructured":"Senior, A., Sak, H., de Chaumont Quitry, F., Sainath, T., & Rao, K. (2015). Acoustic modeling with CD-CTC-SMBR LSTM RNNs. In Proceedings of ASRU, IEEE (pp. 604\u2013609)."},{"key":"9399_CR19","doi-asserted-by":"crossref","unstructured":"Toth, L., & Gosztolya, G. (2016). Adaptation of DNN acoustic models using KL-divergence regularization and multi-task training, In Proceedings of SPECOM. (pp. 108\u2013115).","DOI":"10.1007\/978-3-319-43958-7_12"},{"key":"9399_CR20","doi-asserted-by":"crossref","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","volume":"22","author":"S Xue","year":"2014","unstructured":"Xue, S., Abdel-Hamid, O., Jiang, H., Dai, L., & Liu, Q. (2014). Fast adaptation of deep neural network based on discriminant codes for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22, 1713\u20131725.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9399_CR21","volume-title":"Automatic speech recognition: A deep learning approach","author":"D Yu","year":"2014","unstructured":"Yu, D., & Deng, L. (2014). Automatic speech recognition: A deep learning approach (1st ed.). New York: Springer.","edition":"1"},{"key":"9399_CR22","doi-asserted-by":"crossref","unstructured":"Yu, D., Yao, K., Su, H., Li, G., & Seide, F. (2013). KL-divergence regularized deep neural network adaptation for improved large vocabulary speech recognition. In Proceedings of ICASSP, IEEE (pp. 7893\u20137897).","DOI":"10.1109\/ICASSP.2013.6639201"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9399-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9399-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9399-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,23]],"date-time":"2022-07-23T05:29:40Z","timestamp":1658554180000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9399-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,1]]},"references-count":22,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,3]]}},"alternative-id":["9399"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9399-z","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2017,2,1]]}}}