{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T01:03:28Z","timestamp":1768093408279,"version":"3.49.0"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"20","license":[{"start":{"date-parts":[[2018,3,27]],"date-time":"2018-03-27T00:00:00Z","timestamp":1522108800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1007\/s11042-018-5917-5","type":"journal-article","created":{"date-parts":[[2018,3,27]],"date-time":"2018-03-27T14:29:29Z","timestamp":1522160969000},"page":"27231-27267","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Training deep neural networks with non-uniform frame-level cost function for automatic speech recognition"],"prefix":"10.1007","volume":"77","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4274-4396","authenticated-orcid":false,"given":"Aldonso","family":"Becerra","sequence":"first","affiliation":[]},{"given":"J. Ismael","family":"de la Rosa","sequence":"additional","affiliation":[]},{"given":"Efr\u00e9n","family":"Gonz\u00e1lez","sequence":"additional","affiliation":[]},{"given":"A. David","family":"Pedroza","sequence":"additional","affiliation":[]},{"given":"N. Iracemi","family":"Escalante","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,3,27]]},"reference":[{"key":"5917_CR1","doi-asserted-by":"crossref","unstructured":"Ali A, Zhang Y, Cardinal P, Dahak N, Vogel S, Glass J (2014) A complete KALDI recipe for building Arabic speech recognition systems. In: Proceedings of spoken language technology (SLT), pp 525\u2013529","DOI":"10.1109\/SLT.2014.7078629"},{"key":"5917_CR2","doi-asserted-by":"crossref","unstructured":"Allauzen C, Riley M, Schalkwyk J, Skut W, Mohri M (2007) OpenFst: a general and efficient weighted finite-state transducer library. In: Proceedings of int. conf. on implementation and application of automata (CIAA), pp 11\u201323","DOI":"10.1007\/978-3-540-76336-9_3"},{"key":"5917_CR3","doi-asserted-by":"crossref","unstructured":"Bacchiani M, Senior A, Heigold G (2014) Asynchronous, Online, GMM-free training of a context dependent acoustic model for speech recognition. In: Proceedings of Interspeech, pp 1900\u20131904","DOI":"10.21437\/Interspeech.2014-430"},{"key":"5917_CR4","doi-asserted-by":"crossref","unstructured":"Becerra A, de la Rosa JI, Gonz\u00e1lez E (2016) A case study of speech recognition in Spanish: from conventional to deep approach. In: Proceedings of IEEE ANDESCON","DOI":"10.1109\/ANDESCON.2016.7836212"},{"issue":"12","key":"5917_CR5","doi-asserted-by":"publisher","first-page":"15875","DOI":"10.1007\/s11042-017-5160-5","volume":"77","author":"Aldonso Becerra","year":"2017","unstructured":"Becerra A, de la Rosa JI, Gonz\u00e1lez E (2017) Speech recognition in a dialog system: from conventional to deep processing. A case study applied to Spanish. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-017-5160-5","journal-title":"Multimedia Tools and Applications"},{"issue":"1","key":"5917_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio Y (2009) Learning deep architectures for AI. Found Trends Mach Learn 2(1):1\u2013127. https:\/\/doi.org\/10.1561\/2200000006","journal-title":"Found Trends Mach Learn"},{"issue":"3","key":"5917_CR7","doi-asserted-by":"publisher","first-page":"869","DOI":"10.1093\/ietisy\/e89-d.3.869","volume":"E89-D","author":"J Bilmes","year":"2006","unstructured":"Bilmes J (2006) What HMMs can do. IEICE Trans Inf Syst E89-D(3):869\u2013891","journal-title":"IEICE Trans Inf Syst"},{"key":"5917_CR8","volume-title":"Pattern recognition and machine learning","author":"C Bishop","year":"2006","unstructured":"Bishop C (2006) Pattern recognition and machine learning. Springer, NY"},{"key":"5917_CR9","volume-title":"Connectionist speech recognition: a hybrid approach","author":"H Bourlard","year":"1993","unstructured":"Bourlard H, Morgan N (1993) Connectionist speech recognition: a hybrid approach. Kluwer Academic Publishers, Norwell"},{"issue":"3","key":"5917_CR10","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1109\/TIT.1982.1056497","volume":"28","author":"J Burbea","year":"1982","unstructured":"Burbea J, Rao R (1982) On the convexity of some divergence measures based on entropy functions. IEEE Trans Inf Theory 28(3):489\u2013495","journal-title":"IEEE Trans Inf Theory"},{"key":"5917_CR11","doi-asserted-by":"crossref","unstructured":"Chen X, Eversole A, Li G, Yu D, Seide F (2012) Pipelined Back-Propagation for Context-Dependent deep neural networks. In: Proceedings of Interspeech","DOI":"10.21437\/Interspeech.2012-7"},{"issue":"1","key":"5917_CR12","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2012","unstructured":"Dahl GE, Yu D, Deng L, Acero A (2012) Context-dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans Audio Speech Lang Process 20(1):30\u201342","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"5917_CR13","doi-asserted-by":"publisher","unstructured":"Deng L (2014) A tutorial survey of architectures, algorithms, and applications for deep learning. APSIPA Trans Signal Info Process 3(e2). https:\/\/doi.org\/10.1017\/atsip.2013.9","DOI":"10.1017\/atsip.2013.9"},{"issue":"5","key":"5917_CR14","doi-asserted-by":"publisher","first-page":"1060","DOI":"10.1109\/TASL.2013.2244083","volume":"21","author":"L Deng","year":"2013","unstructured":"Deng L, Li X (2013) Machine learning paradigms for speech recognition: an overview. IEEE Trans Audio Speech Lang Process 21(5):1060\u20131089","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"7","key":"5917_CR15","doi-asserted-by":"publisher","first-page":"1677","DOI":"10.1109\/78.134406","volume":"39","author":"L Deng","year":"1991","unstructured":"Deng L, Kenny P, Lennig M, Gupta V, Seitz F, Mermelstein P (1991) Phonemic hidden markov models with continuous mixture output densities for large vocabulary word recognition. IEEE Trans Signal Process 39(7):1677\u20131681","journal-title":"IEEE Trans Signal Process"},{"key":"5917_CR16","volume-title":"Pattern Classification","author":"R Duda","year":"2001","unstructured":"Duda R, Hart P, Stork D (2001) Pattern Classification. Wiley, NY"},{"issue":"3","key":"5917_CR17","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1561\/2000000004","volume":"1","author":"MJF Gales","year":"2007","unstructured":"Gales MJF, Young SJ (2007) The application of hidden Markov models in speech recognition. Found Trends Signal Process 1(3):195\u2013304","journal-title":"Found Trends Signal Process"},{"issue":"2","key":"5917_CR18","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/89.279278","volume":"2","author":"J Gauvain","year":"1994","unstructured":"Gauvain J, Lee C h (1994) Maximum a posteriori estimation for multivariate gaussian mixture observations of markov chains. IEEE Trans Speech Audio Process 2 (2):291\u2013298","journal-title":"IEEE Trans Speech Audio Process"},{"key":"5917_CR19","doi-asserted-by":"crossref","unstructured":"Ge Z, Iyer AN, Cheluvaraja S, Sundaram R, Ganapathiraju A (2017) Neural network based speaker classification and verification systems with enhanced features. In: Proceedings of intelligent systems conference","DOI":"10.1109\/IntelliSys.2017.8324265"},{"key":"5917_CR20","volume-title":"Neural network design","author":"MT Hagan","year":"2014","unstructured":"Hagan MT, Demuth HB, Beale MH, De Jes\u00fas O (2014) Neural network design. CreateSpace, US"},{"issue":"12","key":"5917_CR21","doi-asserted-by":"publisher","first-page":"2616","DOI":"10.1109\/TASL.2013.2280234","volume":"21","author":"G Heigold","year":"2013","unstructured":"Heigold G, Ney H, Schl\u00fcter R (2013) Investigations on an EM-style optimization algorithm for discriminative training of HMMs. IEEE Trans Audio Speech Lang Process 21(12):2616\u20132626","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"7","key":"5917_CR22","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"G Hinton","year":"2006","unstructured":"Hinton G, Osindero S, Teh Y (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554","journal-title":"Neural Comput"},{"issue":"6","key":"5917_CR23","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton G, Deng L, Yu D, Dahl GE, Mohamed A, Jaitly N, Senior A, Vanhoucke V, Nguyen P, Sainath TN, Kingsbury B (2012) Deep neural networks for acoustic modeling in speech recognition. IEEE Signal Proc Mag 29(6):82\u201397","journal-title":"IEEE Signal Proc Mag"},{"key":"5917_CR24","doi-asserted-by":"crossref","unstructured":"Huang Z, Li J, Weng Ch, Lee Ch (2014) Beyond cross-entropy: towards better frame-level objective functions for deep neural network training in automatic speech recognition. In: Proceedings of Interspeech, pp 1214\u20131218","DOI":"10.21437\/Interspeech.2014-306"},{"issue":"2","key":"5917_CR25","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1109\/TIT.1986.1057145","volume":"IT-32","author":"BH Juang","year":"1986","unstructured":"Juang BH, Levinson SE, Sondhi M (1986) Maximum likelihood estimation for multivariate mixture observations of markov chains. IEEE Trans Inf Theory IT-32(2):307\u2013309","journal-title":"IEEE Trans Inf Theory"},{"key":"5917_CR26","volume-title":"Speech and language processing: an introduction to natural language processing, computational linguistics and speech recognition","author":"D Jurafsky","year":"2008","unstructured":"Jurafsky D, Martin J (2008) Speech and language processing: an introduction to natural language processing, computational linguistics and speech recognition. Pearson, NJ"},{"key":"5917_CR27","doi-asserted-by":"crossref","unstructured":"Kingsbury B, Sainath TN, Soltau H (2012) Scalable minimum Bayes risk training of deep neural network acoustic models using distributed Hessian-free optimization. In: Proceedings of InterSpeech","DOI":"10.21437\/Interspeech.2012-3"},{"issue":"1","key":"5917_CR28","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1214\/14-STS430","volume":"30","author":"F Lad","year":"2015","unstructured":"Lad F, Sanfilippo G, Agr\u00f3 G (2015) Extropy: complementary dual of entropy. Stat Sci 30(1):40\u201358","journal-title":"Stat Sci"},{"key":"5917_CR29","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521:436\u2013444","journal-title":"Nature"},{"key":"5917_CR30","doi-asserted-by":"publisher","unstructured":"Liao Y, Lee H, Lee L (2015) Towards structured deep neural network for automatic speech recognition. In: Proceedings of ASRU, https:\/\/doi.org\/10.1109\/ASRU.2015.7404786","DOI":"10.1109\/ASRU.2015.7404786"},{"key":"5917_CR31","doi-asserted-by":"crossref","unstructured":"Li X, Wu X (2014) Labeling unsegmented sequence data with DNN-HMM and its application for speech recognition. In: Proceedings of int. symp. on chinese spoken language processing (ISCSLP)","DOI":"10.1109\/ISCSLP.2014.6936622"},{"key":"5917_CR32","doi-asserted-by":"crossref","unstructured":"Li X, Hong C, Yang Y, Wu X (2013) Deep neural networks for syllable based acoustic modeling in Chinese speech recognition. In: Proceedings of signal and information processing association annu. summit and conf. (APSIPA)","DOI":"10.1109\/APSIPA.2013.6694176"},{"key":"5917_CR33","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.neucom.2014.07.087","volume":"170","author":"X Li","year":"2015","unstructured":"Li X, Yang Y, Pang Z, Wu X (2015) A comparative study on selecting acoustic modeling units in deep neural networks based large vocabulary Chinese speech recognition. Neurocomputing 170:251\u2013256","journal-title":"Neurocomputing"},{"key":"5917_CR34","volume-title":"Mixture models","author":"G McLachlan","year":"1988","unstructured":"McLachlan G (1988) Mixture models. Marcel Dekker, New York"},{"key":"5917_CR35","volume-title":"Elements of artificial neural networks","author":"k Mehrotra","year":"1997","unstructured":"Mehrotra k, Mohan Ch, Ranka S (1997) Elements of artificial neural networks. MIT Press, Cambridge"},{"key":"5917_CR36","doi-asserted-by":"crossref","unstructured":"Miao Y, Metze F, Improving low-resource CD-DNN-HMM using dropout and multilingual DNN training (2013). In: Proceedings of InterSpeech, pp 2237\u20132241","DOI":"10.21437\/Interspeech.2013-526"},{"issue":"1","key":"5917_CR37","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"A Mohamed","year":"2012","unstructured":"Mohamed A, Dahl GE, Hinton G (2012) Acoustic modeling using deep belief networks. IEEE Trans Audio Speech Lang Process 20(1):14\u201322","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"5917_CR38","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1109\/79.382443","volume":"12","author":"N Morgan","year":"1995","unstructured":"Morgan N, Bourlard H (1995) An introduction to hybrid HMM\/connectionist continuous speech recognition. IEEE Signal Proc Mag 12(3):25\u201342","journal-title":"IEEE Signal Proc Mag"},{"key":"5917_CR39","doi-asserted-by":"crossref","unstructured":"Pan J, Liu C, Wang Z, Hu Y, Jiang H (2012) Investigation of Deep Neural Networks (DNN) for large vocabulary continuous speech recognition: Why DNN surpass GMMs in acoustic modeling. In: Proceedings of int. symp. on chinese spoken language processing (ISCSLP), pp 301\u2013305","DOI":"10.1109\/ISCSLP.2012.6423452"},{"key":"5917_CR40","unstructured":"Povey S, Ghoshal A, Boulianne G, Burget L, Glembek O, Goel N, Hannemann M, Motlicek P, Qian Y, Schwarz P, Silovsky J, Stemmer G, Vesely K (2011) The Kaldi speech recognition toolkit. In: Proceedings of automatic speech recognition and understanding workshop (ASRU)"},{"key":"5917_CR41","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1007\/978-94-009-6357-3_5","volume-title":"Multivariate Statistical Methods in Physical Anthropology","author":"C. Radhakrishna Rao","year":"1984","unstructured":"Rao R (1984) Use of diversity and distance measures in the analysis of qualitative data. In: Van Vark GN, Howells WW (eds) Multivariate statistical methods in physical anthropology. D. Reidel Publishing Company, Dordrecht, pp 49\u201367"},{"issue":"2","key":"5917_CR42","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"L Rabiner","year":"1989","unstructured":"Rabiner L (1989) A tutorial on hidden Markov models and selected applications in speech recognition. Proceedings of IEEE 77(2):257\u2013286","journal-title":"Proceedings of IEEE"},{"key":"5917_CR43","volume-title":"Fundamentals of speech recognition","author":"L Rabiner","year":"1993","unstructured":"Rabiner L, Juang B (1993) Fundamentals of speech recognition. Prentice-Hall, New Jersey"},{"key":"5917_CR44","doi-asserted-by":"crossref","unstructured":"Rath S, Povey D, Vesel K, Cernock J (2013) Improved feature processing for deep neural networks. In: Proceedings of Interspeech, pp 109\u2013113","DOI":"10.21437\/Interspeech.2013-48"},{"key":"5917_CR45","doi-asserted-by":"crossref","unstructured":"Ray J, Thompson B, Shen W (2014) Comparing a high and low-level deep neural network implementation for automatic speech recognition. In: Proceedings of workshop for high performance technical computing in dynamic languages (HPTCDL), pp 41\u201346","DOI":"10.1109\/HPTCDL.2014.12"},{"issue":"1","key":"5917_CR46","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds DA, Quatieri TF, Dunn TRB (2000) Speaker verification using adapted gaussian mixture models. Digital Signal Process 10(1):19\u201341","journal-title":"Digital Signal Process"},{"key":"5917_CR47","doi-asserted-by":"crossref","unstructured":"Sainath TN, Kingsbury B, Ramabhadran B, Fousek P, Novak P, Mohamed A (2011) Making deep belief networks effective for large vocabulary continuous speech recognition. In: Proceedings of automatic speech recognition and understanding workshop (ASRU)","DOI":"10.1109\/ASRU.2011.6163900"},{"issue":"11","key":"5917_CR48","doi-asserted-by":"publisher","first-page":"2267","DOI":"10.1109\/TASL.2013.2284378","volume":"21","author":"TN Sainath","year":"2013","unstructured":"Sainath TN, Kingsbury B, Soltau H, Ramabhadran B (2013) Optimization techniques to improve training speed of deep neural networks for large speech tasks. IEEE Trans Audio Speech Lang Process 21(11):2267\u20132276","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"5917_CR49","unstructured":"Scowen R (1993) Extended bnf - generic base standards. In: Proceedings of software engineering standards symp., pp 25\u201334"},{"key":"5917_CR50","doi-asserted-by":"crossref","unstructured":"Seide F, Li G, Chen X, Yu D (2011) Feature engineering in context-dependent deep neural networks for conversational speech transcription. In: Proceedings of automatic speech recognition and understanding workshop (ASRU), pp 24\u201329","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"5917_CR51","doi-asserted-by":"crossref","unstructured":"Seide F, Li G, Yu D (2011) Conversational speech transcription using context-dependent deep neural networks. In: Proceedings of Interspeech, pp 437\u2013440","DOI":"10.21437\/Interspeech.2011-169"},{"key":"5917_CR52","doi-asserted-by":"crossref","unstructured":"Seki H, Yamamoto K, Nakagawa S (2014) Comparison of syllable-based and phoneme-based DNN-HMM in Japanese speech recognition. In: Proceedings of int. conf. of advanced informatics concept, theory and application (ICAICTA), pp 249\u2013254","DOI":"10.1109\/ICAICTA.2014.7005949"},{"key":"5917_CR53","doi-asserted-by":"crossref","unstructured":"Seltzer ML, Yu D, Wang Y (2013) An investigation of deep neural networks for noise robust speech recognition. In: Proceedings of ICASSP, pp 7398\u20137402","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"5917_CR54","unstructured":"Senior A, Heigold G, Bacchiani M, Liao H (2014) GMM-free DNN training. In: Proceedings of ICASSP, pp 5639\u20135643"},{"key":"5917_CR55","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1016\/j.neucom.2014.03.005","volume":"140","author":"SM Siniscalchi","year":"2014","unstructured":"Siniscalchi SM, Svendsen T, Lee Ch (2014) An artificial neural network approach to automatic speech processing. Neurocomputing 140:326\u2013338","journal-title":"Neurocomputing"},{"key":"5917_CR56","doi-asserted-by":"crossref","unstructured":"Su H, Li G, Yu D, Seide F (2013) Error back propagation for sequence training of context-dependent deep networks for conversational speech transcription. In: Proceeedings of ICASSP, pp 6664\u20136668","DOI":"10.1109\/ICASSP.2013.6638951"},{"issue":"6","key":"5917_CR57","doi-asserted-by":"publisher","first-page":"1122","DOI":"10.1109\/TNNLS.2015.2461554","volume":"27","author":"D Tao","year":"2016","unstructured":"Tao D, Cheng Y, Song M, Lin X (2016) Manifold Ranking-Based matrix factorization for saliency detection. IEEE Trans Neural Netw Learn Syst 27(6):1122\u20131134","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"3","key":"5917_CR58","doi-asserted-by":"publisher","first-page":"756","DOI":"10.1109\/TCYB.2015.2414920","volume":"46","author":"D Tao","year":"2016","unstructured":"Tao D, Lin X, Jin L, Li X (2016) Principal component 2-D long short-term memory for font recognition on single chinese characters. IEEE Trans Cybern 46(3):756\u2013765","journal-title":"IEEE Trans Cybern"},{"issue":"6","key":"5917_CR59","doi-asserted-by":"publisher","first-page":"2726","DOI":"10.1109\/TIP.2016.2553446","volume":"25","author":"D Tao","year":"2016","unstructured":"Tao D, Guo Y, Song M, Li Y, Yu Z, Tang Y (2016) Person Re-identification by dual-regularized KISS metric learning. IEEE Trans Image Process 25(6):2726\u20132738","journal-title":"IEEE Trans Image Process"},{"issue":"1-4","key":"5917_CR60","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/S0925-2312(00)00308-8","volume":"37","author":"E Trentin","year":"2001","unstructured":"Trentin E, Gori M (2001) A survey of hybrid ANN\/HMM models for automatic speech recognition. Neurocomputing 37(1-4):91\u2013126","journal-title":"Neurocomputing"},{"key":"5917_CR61","doi-asserted-by":"crossref","unstructured":"Vesely K, Ghoshal A, Burget L, Povey D (2013) Sequence-discriminative training of deep neural networks. In: Proceedings of Interspeech, pp 2345\u20132349","DOI":"10.21437\/Interspeech.2013-548"},{"key":"5917_CR62","doi-asserted-by":"crossref","unstructured":"Vesely K, Hannemann M, Burget L (2013) Semi-supervised training of deep neural networks. In: Proceedings of automatic speech recognition and understanding workshop (ASRU), pp 267\u2013272","DOI":"10.1109\/ASRU.2013.6707741"},{"key":"5917_CR63","doi-asserted-by":"crossref","unstructured":"Vincent P, Larochelle H, Bengio Y, Manzagol P (2008) Extracting and composing robust features with denoising autoencoders. In: Proceedings of int. conf. on machine learning (ICML), pp 1096\u20131103","DOI":"10.1145\/1390156.1390294"},{"key":"5917_CR64","doi-asserted-by":"publisher","unstructured":"Wei W, van Vuuren S (1998) Improved neural network training of inter-word context. In: Proceedings of ICASSP. https:\/\/doi.org\/10.1109\/ICASSP.1998.674476 , pp 1520\u20136149","DOI":"10.1109\/ICASSP.1998.674476"},{"key":"5917_CR65","doi-asserted-by":"crossref","unstructured":"Wiesler S, Golik P, Schluter R, Ney H (2015) Investigations on sequence training of neural networks. In: Proceedings of ICASSP, pp 4565\u20134569","DOI":"10.1109\/ICASSP.2015.7178835"},{"issue":"12","key":"5917_CR66","doi-asserted-by":"publisher","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","volume":"22","author":"S Xue","year":"2014","unstructured":"Xue S, Abdel-Hamid O, Jiang H, Dai L, Liu Q (2014) Fast adaptation of deep neural network based on discriminant codes for speech recognition. IEEE Trans Audio Speech Lang Process 22(12):1713\u20131725","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"5917_CR67","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1007\/978-3-319-10581-9_9","volume":"8679","author":"Z Yang","year":"2014","unstructured":"Yang Z, Zhong A, Carass A, Ying SH, Prince JL (2014) Deep learning for cerebellar ataxia classification and functional score regression. Lect Notes Comput Sci 8679:68\u201376","journal-title":"Lect Notes Comput Sci"},{"key":"5917_CR68","doi-asserted-by":"crossref","unstructured":"Yao K, You D, Seide F, Su H, Deng L, Gong Y (2012) Adaptation of context-dependent deep neural networks for automatic speech recognition. In: Proceedings of spoken language technology (SLT), pp 366\u2013369","DOI":"10.1109\/SLT.2012.6424251"},{"key":"5917_CR69","volume-title":"The HTK Book (for version 3.4)","author":"S Young","year":"2006","unstructured":"Young S, Evermann G, Gales M, Hain T, Kershaw D, Liu X, Moore G, Odell J, Ollason D, Povey D, Valtchev V, Woodland P (2006) The HTK Book (for version 3.4). Cambridge University Engineering Department, Cambridge"},{"key":"5917_CR70","unstructured":"Yu D, Deng L, Dahl GE (2010) Roles of pretraining and fine-tuning in context-dependent DNN-HMMs for real-world speech recognition. In: Proceedings of NIPS workshop on deep learning and unsupervised feature learning"},{"key":"5917_CR71","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic speech recognition: a deep learning approach","author":"D Yu","year":"2015","unstructured":"Yu D, Deng L (2015) Automatic speech recognition: a deep learning approach. Springer, London"},{"key":"5917_CR72","doi-asserted-by":"crossref","unstructured":"Yu D, Seide G, Li G, Deng L (2012) Exploiting sparseness in deep neural networks for large vocabulary speech recognition. In: Proceedings of ICASSP, pp 4409\u20134412","DOI":"10.1109\/ICASSP.2012.6288897"},{"key":"5917_CR73","doi-asserted-by":"crossref","unstructured":"Zhang T (2004) Solving large scale linear prediction problems using stochastic gradient descent algorithms. In: Proceedings of int. conf. on machine learning (ICML), pp 919\u2013926","DOI":"10.1145\/1015330.1015332"},{"key":"5917_CR74","doi-asserted-by":"crossref","unstructured":"Zhang C, Woodland PC (2014) Standalone training of context-dependent deep neural network acoustic models. In: Proceedings of ICASSP, pp 5597\u20135601","DOI":"10.1109\/ICASSP.2014.6854674"},{"key":"5917_CR75","doi-asserted-by":"crossref","unstructured":"Zhao R, Li J, Gong Y (2014) Variable-component deep neural network for robust speech recognition. In: Proceedings of Interspeech","DOI":"10.1109\/SLT.2014.7078632"},{"issue":"4","key":"5917_CR76","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1109\/TASLP.2015.2392944","volume":"23","author":"P Zhou","year":"2015","unstructured":"Zhou P, Jiang H, Dai L, Hu Y, Liu Q (2015) State-clustering based multiple deep neural networks modeling approach for speech recognition. IEEE Trans Audio Speech Lang Process 23(4):631\u2013642","journal-title":"IEEE Trans Audio Speech Lang Process"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-018-5917-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-018-5917-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-018-5917-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T20:23:02Z","timestamp":1693599782000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-018-5917-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,27]]},"references-count":76,"journal-issue":{"issue":"20","published-print":{"date-parts":[[2018,10]]}},"alternative-id":["5917"],"URL":"https:\/\/doi.org\/10.1007\/s11042-018-5917-5","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,27]]},"assertion":[{"value":"31 July 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 February 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 March 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with Ethical Standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}