{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T03:48:22Z","timestamp":1777434502433,"version":"3.51.4"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"S1","license":[{"start":{"date-parts":[[2016,6,22]],"date-time":"2016-06-22T00:00:00Z","timestamp":1466553600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s00521-016-2438-x","type":"journal-article","created":{"date-parts":[[2016,6,23]],"date-time":"2016-06-23T10:54:47Z","timestamp":1466679287000},"page":"1177-1196","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Toward growing modular deep neural networks for continuous speech recognition"],"prefix":"10.1007","volume":"28","author":[{"given":"Zohreh","family":"Ansari","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seyyed Ali","family":"Seyyedsalehi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,6,22]]},"reference":[{"issue":"3","key":"2438_CR1","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1109\/MSP.2009.932166","volume":"26","author":"J Baker","year":"2009","unstructured":"Baker J, Deng L, Glass J, Khudanpur S, Lee C, Morgan N et al (2009) Research developments and directions in speech recognition and understanding, Part 1. IEEE Signal Process Mag 26(3):75\u201380","journal-title":"IEEE Signal Process Mag"},{"issue":"4","key":"2438_CR2","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1109\/MSP.2009.932707","volume":"26","author":"J Baker","year":"2009","unstructured":"Baker J, Deng L, Khudanpur S, Lee C, Glass J, Morgan N et al (2009) Updated MINDS report on speech recognition and understanding, Part 2. IEEE Signal Process Mag 26(4):78\u201385","journal-title":"IEEE Signal Process Mag"},{"issue":"1","key":"2438_CR3","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl G, Yu D, Deng L, Acero A (2012) Context-dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans Audio Speech Lang Process 20(1):30\u201342","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"2438_CR4","doi-asserted-by":"crossref","unstructured":"Kapadia S, Valtchev V, Young S J (1993) MMI training for continuous phoneme recognition on the TIMIT database. In: Proceedings of ICASSP","DOI":"10.1109\/ICASSP.1993.319349"},{"issue":"3","key":"2438_CR5","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/89.568732","volume":"5","author":"B-H Juang","year":"1997","unstructured":"Juang B-H, Hou W, Lee C-H (1997) Minimum classification error rate methods for speech recognition. IEEE Trans Audio Speech Lang Process 5(3):257\u2013265","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"1","key":"2438_CR6","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1109\/TASL.2006.876778","volume":"15","author":"E McDermott","year":"2007","unstructured":"McDermott E, Hazen TJ, Le Roux J, Nakamura A, Katagiri S (2007) Discriminative training for large-vocabulary speech recognition using minimum classification error. IEEE Trans Audio Speech Lang Process 15(1):203\u2013223","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"2438_CR7","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1109\/29.21701","volume":"37","author":"A Waibel","year":"1989","unstructured":"Waibel A, Hanazawa T, Hinton G, Shikano K, Lang KJ (1989) Phoneme recognition using time-delay neural networks. IEEE Trans Audio Speech Lang Process 37(3):328\u2013339","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"2438_CR8","doi-asserted-by":"crossref","unstructured":"Morgan N, Bourlard H (1990) Continuous speech recognition using multilayer perceptrons with hidden Markov models. In: Proceedings of ICASSP, pp 413\u2013416","DOI":"10.1109\/ICASSP.1990.115720"},{"key":"2438_CR9","unstructured":"Seyyedsalehi SA (1995) Continuous Persian speech recognition using functional model of human brain in speech perception. PhD dissertation. Technical Faculty. Tarbiyat Modarres University (in Persian)"},{"issue":"6","key":"2438_CR10","doi-asserted-by":"crossref","first-page":"893","DOI":"10.1109\/72.286885","volume":"4","author":"H Bourlard","year":"1993","unstructured":"Bourlard H, Morgan N (1993) Continuous speech recognition by connectionist statistical methods. IEEE Trans Neural Netw 4(6):893\u2013909","journal-title":"IEEE Trans Neural Netw"},{"key":"2438_CR11","unstructured":"Bourlard HA, Morgan N (1994) Connectionist speech recognition: a hybrid approach. Springer 247"},{"issue":"1","key":"2438_CR12","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/S0925-2312(00)00308-8","volume":"37","author":"E Trentin","year":"2001","unstructured":"Trentin E, Gori M (2001) A survey of hybrid ANN\/HMM models for automatic speech recognition. Neurocomputing 37(1):91\u2013126","journal-title":"Neurocomputing"},{"key":"2438_CR13","doi-asserted-by":"crossref","unstructured":"Seide F, Li G, Yu D (2011) Conversational speech transcription using context-dependent deep neural networks. In: Proceedings of INTERSPEECH, pp 437\u2013440","DOI":"10.21437\/Interspeech.2011-169"},{"key":"2438_CR14","doi-asserted-by":"crossref","unstructured":"Jaitly N, Nguyen, P, Senior A.W, Vanhoucke V (2012) Application of pretrained deep neural networks to large vocabulary speech recognition. In: INTERSPEECH","DOI":"10.21437\/Interspeech.2012-10"},{"issue":"1","key":"2438_CR15","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"AR Mohamed","year":"2012","unstructured":"Mohamed AR, Dahl GE, Hinton G (2012) Acoustic modeling using deep belief networks. IEEE Trans Audio Speech Lang Process 20(1):14\u201322","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"2","key":"2438_CR16","doi-asserted-by":"crossref","first-page":"388","DOI":"10.1109\/TASL.2012.2227738","volume":"21","author":"D Yu","year":"2013","unstructured":"Yu D, Deng L, Seide F (2013) The deep tensor neural network with applications to large vocabulary speech recognition. IEEE Trans Audio Speech Lang Process 21(2):388\u2013396","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"6","key":"2438_CR17","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton G, Deng L, Yu D, Dahl GE, Mohamed AR, Jaitly N et al (2012) Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process Mag 29(6):82\u201397","journal-title":"IEEE Signal Process Mag"},{"key":"2438_CR18","doi-asserted-by":"crossref","unstructured":"Deng L, Yu D, Platt J (2012) Scalable stacking and learning for building deep architectures. In: Proceedings of ICASSP, pp 2133\u20132136","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"2438_CR19","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid O, Mohamed A.R, Jiang H, Penn G (2012) Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition. In: Proceedings of ICASSP, pp 4277\u20134280","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"2438_CR20","doi-asserted-by":"crossref","unstructured":"Andrew G, Bilmes J (2012) Sequential deep belief networks. In: Proceedings of ICASSP, pp 4265\u20134268","DOI":"10.1109\/ICASSP.2012.6288861"},{"issue":"7","key":"2438_CR21","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"G Hinton","year":"2006","unstructured":"Hinton G, Osindero S, Teh Y (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554","journal-title":"Neural Comput"},{"key":"2438_CR22","doi-asserted-by":"crossref","unstructured":"Bengio Y, Lamblin P, Popovici D, Larochelle H (2007) Greedy layer-wise training of deep networks. In: Proceedings of NIPS, pp 153\u2013160","DOI":"10.7551\/mitpress\/7503.003.0024"},{"key":"2438_CR23","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid O, Deng L, Yu D (2013) Exploring convolutional neural network structures and optimization techniques for speech recogntiion. In: Proceedings of INTERSPEECH","DOI":"10.21437\/Interspeech.2013-744"},{"issue":"11","key":"2438_CR24","doi-asserted-by":"crossref","first-page":"2267","DOI":"10.1109\/TASL.2013.2284378","volume":"21","author":"T Sainath","year":"2013","unstructured":"Sainath T, Kingsbury B, Soltau H, Ramabhadran B (2013) Optimization techniques to improve training speed of deep neural networks for large speech tasks. IEEE Trans Audio Speech Lang Process 21(11):2267\u20132276","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"2438_CR25","doi-asserted-by":"crossref","unstructured":"Mohamed AR, Yu D, DengL (2010) Investigation of full-sequence training of deep belief networks for speech recognition. In: INTERSPEECH, pp 2846\u20132849","DOI":"10.21437\/Interspeech.2010-304"},{"issue":"5","key":"2438_CR26","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1109\/MSP.2008.926652","volume":"25","author":"X He","year":"2008","unstructured":"He X, Deng L, Chou W (2008) Discriminative learning in sequential pattern recognition. IEEE Signal Process Mag 25(5):14\u201336","journal-title":"IEEE Signal Process Mag"},{"key":"2438_CR27","doi-asserted-by":"crossref","unstructured":"Kingsbury B (2009) Lattice-based optimization of sequence classification criteria for neural-network acoustic modeling. In: Proceedings of ICASSP, pp 3761\u20133764","DOI":"10.1109\/ICASSP.2009.4960445"},{"key":"2438_CR28","doi-asserted-by":"crossref","unstructured":"Prabhavalkar R, Fosler-Lussier E (2010, March) Backpropagation training for multilayer conditional random field based phone recognition. In: Proceedings of ICASSP, pp 5534\u20135537","DOI":"10.1109\/ICASSP.2010.5495222"},{"key":"2438_CR29","unstructured":"Lewandowski N, Droppo J, Seltzer M, Yu D (2014) Phone sequence modeling with recurrent neural networks. In: Proceedings of ICASSP"},{"key":"2438_CR30","unstructured":"Trmal I (2011) Spatio-temporal structure of feature vectors in neural network adaptation. PhD dissertation, Faculty of applied sciences, University of West Bohemia"},{"issue":"10","key":"2438_CR31","doi-asserted-by":"crossref","first-page":"827","DOI":"10.1016\/j.specom.2006.11.005","volume":"49","author":"R Gemello","year":"2007","unstructured":"Gemello R, Mana F, Scanzio S, Laface P, De Mori R (2007) Linear hidden transformations for adaptation of hybrid ANN\/HMM models. Speech Commun 49(10):827\u2013835","journal-title":"Speech Commun"},{"key":"2438_CR32","doi-asserted-by":"crossref","unstructured":"Li B, Sim, KC (2010) Comparison of discriminative input and output transformations for speaker adaptation in the hybrid NN\/HMM systems. In: INTERSPEECH, pp 526\u2013529","DOI":"10.21437\/Interspeech.2010-214"},{"issue":"12","key":"2438_CR33","doi-asserted-by":"crossref","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","volume":"22","author":"X Shaofei","year":"2012","unstructured":"Shaofei X, Abdel-Hamid O, Jiang H, Dai L, Liu Q (2012) Fast adaptation of deep neural networks based on discriminant codes for speech recognition. IEEE Trans Audio Speech Lang Process 22(12):1713\u20131725","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"2438_CR34","unstructured":"Karimi K (2003) Implementing the speaker features for quality improvement of the speech recognition models. MS thesis. Faculty of biomedical engineering. Amirkabir University of Technology"},{"key":"2438_CR35","doi-asserted-by":"crossref","unstructured":"Pan J, Liu C, Wang Z, Hu Y, Jiang H (2012) Investigation of deep neural networks (DNN) for large vocabulary continuous speech recognition: why DNN surpasses GMMS in acoustic modeling. In: ISCSLP, pp 301\u2013305","DOI":"10.1109\/ISCSLP.2012.6423452"},{"key":"2438_CR36","doi-asserted-by":"crossref","unstructured":"Seide F, Li G, Chen X, Yu D (2011) Feature engineering in context-dependent deep neural networks for conversational speech transcription. In: IEEE workshop on proceedings of ASRU, pp 24\u201329","DOI":"10.1109\/ASRU.2011.6163899"},{"issue":"1","key":"2438_CR37","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/s00521-007-0151-5","volume":"18","author":"I Nejadgholi","year":"2009","unstructured":"Nejadgholi I, Seyyedsalehi SA (2009) Nonlinear normalization of input patterns to speaker variability in speech recognition neural networks. Neural Comput Appl 18(1):45\u201355","journal-title":"Neural Comput Appl"},{"key":"2438_CR38","unstructured":"Seyyedsalehi SA (2003) Designing a neural network based speech recognition system for Persian LVCSR task. Technical Report. Research Center of Intelligent Signal Processing (RCISP) (in Persian)"},{"key":"2438_CR39","doi-asserted-by":"crossref","unstructured":"Sainath TN, Kingsbury B, Ramabhadran B, Fousek P, Novak P, Mohamed AR (2011) Making deep belief networks effective for large vocabulary continuous speech recognition. In: IEEE workshop on proceedings of automatic speech recognition and understanding (ASRU), pp 30\u201335","DOI":"10.1109\/ASRU.2011.6163900"},{"key":"2438_CR40","doi-asserted-by":"crossref","first-page":"989","DOI":"10.1016\/S0893-6080(99)00049-0","volume":"12","author":"E Koerner","year":"1999","unstructured":"Koerner E, Gewaltig M, Koerner U, Richter A, Rodemann T (1999) A model of computation in neocortical architecture. Neural Netw 12:989\u20131005","journal-title":"Neural Netw"},{"key":"2438_CR41","doi-asserted-by":"crossref","first-page":"791","DOI":"10.1016\/S0893-6080(96)00126-8","volume":"10","author":"E Koerner","year":"1997","unstructured":"Koerner E, Tsujino H, Masutani T (1997) A cortical type modular neural network for hypothetical reasoning. Neural Netw 10:791\u2013814","journal-title":"Neural Netw"},{"issue":"1","key":"2438_CR42","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1016\/0893-6080(90)90044-L","volume":"3","author":"KJ Lang","year":"1990","unstructured":"Lang KJ, Waibel AH, Hinton GE (1990) A time-delay neural network architecture for isolated word recognition. Neural Netw 3(1):23\u201343","journal-title":"Neural Netw"},{"key":"2438_CR43","unstructured":"Chen B, Zhu Q, Morgan N (2005) Tonotopic multilayer perceptron a neural network for learning long term temporal features for speech recognition. In: Proceedings of ICASSP, pp 945\u2013948"},{"issue":"5","key":"2438_CR44","doi-asserted-by":"crossref","first-page":"935","DOI":"10.1007\/s00521-011-0563-0","volume":"21","author":"H Behbood","year":"2012","unstructured":"Behbood H, Seyyedsalehi SA, Tohidypour HR, Najafi M, Gharibzadeh sh (2012) A novel neural-based model for acoustic-articulatory inversion mapping. Neural Comput Appl 21(5):935\u2013943","journal-title":"Neural Comput Appl"},{"issue":"2","key":"2438_CR45","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1007\/s11063-013-9322-9","volume":"40","author":"SZ Seyyedsalehi","year":"2014","unstructured":"Seyyedsalehi SZ, Seyyedsalehi SA (2014) Simultaneous learning of nonlinear manifolds based on the bottleneck neural network. Neural Process Lett 40(2):191\u2013209","journal-title":"Neural Process Lett"},{"key":"2438_CR46","doi-asserted-by":"crossref","unstructured":"Ansary L, Seyyedsalehi SA (2004) Modeling phones coarticulation effects in a neural network based speaker recognition system. In: Proceedings of Interspeech","DOI":"10.21437\/Interspeech.2004-621"},{"key":"2438_CR47","unstructured":"Bijankhan M, Sheikhzadegan J, Roohani MR, Samareh Y, Lucas C, Tebyani M (1994) FARSDAT-the speech database of Farsi spoken language. In: Proceedings of the Australian conference on speech science and technology, vol 2, pp 826\u2013830"},{"issue":"1","key":"2438_CR48","first-page":"17","volume":"20","author":"M Ghayoomi","year":"2010","unstructured":"Ghayoomi M, Momtazi S, Bijankhan M (2010) A study of corpus development for Persian. Int J Asian Lang Process 20(1):17\u201333","journal-title":"Int J Asian Lang Process"},{"key":"2438_CR49","unstructured":"Rahiminejad M (2002) Development and enhancement of current feature extraction methods in speech recognition systems. MS Thesis. Faculty of Biomedical Engineering. Amirkabir University of Technology (in Persian)"},{"issue":"2","key":"2438_CR50","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1109\/TASL.2010.2045943","volume":"19","author":"J Pinto","year":"2011","unstructured":"Pinto J, Garimella S, Hermansky H, Bourlard H (2011) Analysis of MLP-based hierarchical phoneme posterior probability estimator. IEEE Trans Audio Speech Lang Process 19(2):225\u2013241","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"2438_CR51","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1007\/s00521-009-0328-1","volume":"19","author":"B Makki","year":"2010","unstructured":"Makki B, Noori Hosseini M, Seyyedsalehi SA (2010) An evolving neural network to perform dynamic principal component analysis. Neural Comput Appl 19(3):459\u2013463","journal-title":"Neural Comput Appl"},{"issue":"3","key":"2438_CR52","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1007\/s00521-009-0275-x","volume":"19","author":"B Makki","year":"2010","unstructured":"Makki B, Noori Hosseini M, Seyyedsalehi SA, Sadati N (2010) Unaligned training for voice conversion based on a local nonlinear principal component analysis approach. Neural Comput Appl 19(3):437\u2013444","journal-title":"Neural Comput Appl"},{"issue":"2","key":"2438_CR53","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1016\/j.specom.2011.09.001","volume":"54","author":"HR Tohidypour","year":"2012","unstructured":"Tohidypour HR, Seyyedsalehi SA, Behbood H, Roshandel H (2012) A new representation for speech frame recognition based on redundant wavelet filter banks. Speech Commun 54(2):256\u2013271","journal-title":"Speech Commun"},{"key":"2438_CR54","first-page":"485","volume-title":"Nevisa, a persian continuous speech recognition system. Advances in Computer Science and Engineering","author":"H Sameti","year":"2009","unstructured":"Sameti H, Veisi H, Bahrani M, Babaali B, Hosseinzadeh Kh (2009) Nevisa, a persian continuous speech recognition system. Advances in Computer Science and Engineering. Springer, Berlin, pp 485\u2013492"},{"key":"2438_CR55","unstructured":"Karami Sh (2000) Implementing information existing in transition regions of phoneme borders to improve capability of speaker independent neural network based speech recognition systems. MS Thesis. Faculty of biomedical engineering. Amirkabir University of Technology (in Persian)"},{"key":"2438_CR56","unstructured":"HTK (v.3.4), Hidden Markov Model Toolkit: http:\/\/htk.eng.cam.ac.uk"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-016-2438-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-016-2438-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-016-2438-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-016-2438-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,19]],"date-time":"2023-08-19T00:11:50Z","timestamp":1692403910000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-016-2438-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6,22]]},"references-count":56,"journal-issue":{"issue":"S1","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["2438"],"URL":"https:\/\/doi.org\/10.1007\/s00521-016-2438-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6,22]]}}}