{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,25]],"date-time":"2026-07-25T16:14:59Z","timestamp":1784996099302,"version":"3.55.0"},"reference-count":232,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/OAPA.html"}],"funder":[{"name":"University of Sharjah through the Competitive Research Project \u201cEmotion Recognition in each of Stressful and Emotional Talking Environments Using Artificial Models\u201d","award":["1602040348-P"],"award-info":[{"award-number":["1602040348-P"]}]},{"name":"Applied Science Private University, Amman, Jordan"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2019]]},"DOI":"10.1109\/access.2019.2896880","type":"journal-article","created":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T21:08:39Z","timestamp":1549055319000},"page":"19143-19165","source":"Crossref","is-referenced-by-count":993,"title":["Speech Recognition Using Deep Neural Networks: A Systematic Review"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1570-0897","authenticated-orcid":false,"given":"Ali Bou","family":"Nassif","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7856-9342","authenticated-orcid":false,"given":"Ismail","family":"Shahin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Imtinan","family":"Attili","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mohammad","family":"Azzeh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Khaled","family":"Shaalan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref170","first-page":"1910","article-title":"Learning small-size DNN with output-distribution-based criteria","author":"li","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853582"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854479"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/ChinaSIP.2014.6889193"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6855086"},{"key":"ref176","first-page":"2335","article-title":"Relation classification via convolutional deep neural network","author":"zeng","year":"2011","journal-title":"Proc COLING"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854661"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854926"},{"key":"ref177","first-page":"616","article-title":"Robust speech recognition with speech enhanced deep neural networks","author":"du","year":"2014","journal-title":"Proc 15th Annu Conf Int Speech Commun Assoc (INTERSPEECH)"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854673"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854051"},{"key":"ref39","author":"alpaydin","year":"2015","journal-title":"Introduction to Machine Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s00701-017-3385-8"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-012-9170-4"},{"key":"ref32","first-page":"1123","article-title":"Improving automatic emotion recognition from speech via gender differentiation","author":"vogt","year":"2006","journal-title":"Proc Lang Resources Eval Conf"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4517932"},{"key":"ref30","first-page":"1","article-title":"Automatic dialect and accent recognition and its application to speech recognition","author":"biadsy","year":"2011"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-017-3217-8_6"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/NGMAST.2012.39"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/2347736.2347755"},{"key":"ref34","first-page":"1","author":"schapire","year":"2008","journal-title":"Machine Learning Theory"},{"key":"ref181","first-page":"3007","article-title":"Speaker adaptation of DNN-based ASR with I-vectors: Does it actually adapt models to speakers?","author":"rouvier","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854828"},{"key":"ref185","first-page":"2189","article-title":"Towards speaker adaptive training of deep neural network acoustic models","volume":"20","author":"miao","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/ICOSP.2014.7015050"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP.2014.6936615"},{"key":"ref182","first-page":"223","article-title":"Speech emotion recognition using deep neural network and extreme learning machine","author":"han","year":"2014","journal-title":"Proc 15th Annu Int Conf"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178782"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178801"},{"key":"ref186","first-page":"135","article-title":"Vocal tract length normalisation approaches to DNN-based children&#x2019;s and adults&#x2019; speech recognition","author":"serizel","year":"2014","journal-title":"Proc of the IEEE Workshop on Spoken Language Technology (SLT)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00094-7"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-60087-6_31"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854681"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2008.917992"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1515\/jisys-2014-0118"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2013.03.013"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-013-9188-2"},{"key":"ref24","first-page":"5","article-title":"Emotion recognition in speech signal: Experimental study, development, and application","author":"petrushin","year":"2000","journal-title":"Proc Int Conf Spoken Lang Process (ICSLP)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-011-9089-1"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00070-5"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.03.006"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data with neural networks","volume":"313","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/ICOSP.2014.7015061"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853860"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref155","doi-asserted-by":"crossref","first-page":"293","DOI":"10.21437\/Odyssey.2014-44","article-title":"Deep Neural Networks for extracting Baum-Welch statistics for speaker recognition","author":"kenny","year":"2014","journal-title":"IEEE Odyssey Speaker and Language Recognition Workshop"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854622"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2014.2325781"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853592"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853887"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2329237"},{"key":"ref148","first-page":"2180","article-title":"Adaptation of deep neural network acoustic models using factorised I-vectors","author":"karanasou","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2013.2291240"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1017\/atsip.2013.9"},{"key":"ref58","article-title":"Design and learning of output representations for speech recognition","author":"deng","year":"2013","journal-title":"Proc NIPS Workshop Learn Output Representations"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"ref56","first-page":"3207","volume":"22","author":"cire?an","year":"2010","journal-title":"Deep Big Simple Neural Nets Excel on Handwritten Digit Recognition"},{"key":"ref55","first-page":"1","article-title":"Deep neural networks segment neuronal membranes in electron microscopy images","author":"cire?an","year":"2012","journal-title":"Proc NIPS"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"ref53","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref40","year":"2017","journal-title":"Supervised Learning"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854823"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2372314"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853591"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853589"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/ChinaSIP.2014.6889204"},{"key":"ref162","doi-asserted-by":"crossref","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","article-title":"Fast adaptation of deep neural network based on discriminant codes for speech recognition","volume":"22","author":"xue","year":"2014","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854662"},{"key":"ref160","first-page":"2685","article-title":"Experiments on deep learning for speech denoising","author":"liu","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref4","first-page":"2186","article-title":"A survey on speech recognition","author":"singh","year":"2013","journal-title":"Int J Adv Res Comput Eng Technol"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1080\/02564602.2015.1010611"},{"key":"ref6","first-page":"1","article-title":"Speech recognition using deep learning algorithms","author":"zhang","year":"2013"},{"key":"ref5","first-page":"181","article-title":"Speech recognition by machine: A review","volume":"6","author":"anusuya","year":"2009","journal-title":"Int J Comput Sci Inf Secur"},{"key":"ref8","first-page":"1051","article-title":"Guidelines for performing Systematic Literature reviews in software engineering version 2.3","volume":"45","author":"kitchenham","year":"2007","journal-title":"Engineering"},{"key":"ref159","first-page":"1915","article-title":"Ensemble deep learning for speech recognition","author":"deng","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1561\/2200000006"},{"key":"ref7","article-title":"Novel cascaded Gaussian mixture model-deep neural network classifier for speaker identification in emotional talking environments","author":"shahin","year":"0","journal-title":"Neural Comput Appl"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854824"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2116010"},{"key":"ref158","first-page":"2670","article-title":"Dynamic noise aware training for speech enhancement based on deep neural networks","author":"xu","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01331-5"},{"key":"ref45","first-page":"10","article-title":"Semi-Supervised Learning Literature Survey Contents","author":"zhu","year":"2008"},{"key":"ref48","first-page":"1","article-title":"Unsupervised learning","author":"dayan","year":"2009","journal-title":"The MIT Encyclopedia of the Cognitive Sciences"},{"key":"ref47","first-page":"342","article-title":"Kernel methods for deep learning","volume":"22","author":"cho","year":"2009","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ISAP.2015.7325580"},{"key":"ref41","author":"krugman","year":"2012","journal-title":"International economics Theory and policy"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/I2MTC.2015.7151313"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-61068-4_5"},{"key":"ref73","first-page":"1","article-title":"Roles of pre-training and fine-tuning in context-dependent DBN-HMMs for real-world speech recognition","author":"yu","year":"2010","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref72","first-page":"469","article-title":"Phone recognition with the mean-covariance restricted Boltzmann machine","author":"dahl","year":"2010","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref71","doi-asserted-by":"crossref","first-page":"2846","DOI":"10.21437\/Interspeech.2010-304","article-title":"Investigation of full-sequence training of deep belief networks for speech recognition","author":"mohamed","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref70","doi-asserted-by":"crossref","first-page":"1692","DOI":"10.21437\/Interspeech.2010-487","article-title":"Binary coding of speech spectrograms using a deep auto-encoder","author":"deng","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref76","first-page":"437","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"2011","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947494"},{"key":"ref74","first-page":"1","article-title":"A deep neural network for acoustic-articulatory speech inversion","author":"uria","year":"2011","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"2281","DOI":"10.21437\/Interspeech.2011-606","article-title":"Accelerated parallelizable neural network learning algorithm for speech recognition","author":"yu","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref78","first-page":"2285","article-title":"Deep convex network: A scalable architecture for speech pattern classification","author":"deng","year":"2011","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"ref62","first-page":"1033","article-title":"Learning recurrent neural networks with Hessian-free optimization","author":"martens","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn (ICML)"},{"key":"ref61","first-page":"735","article-title":"Deep learning via Hessian-free optimization","volume":"951","author":"martens","year":"2010","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref64","first-page":"1","article-title":"Deep neural network adaptation for children&#x2019;s and adults&#x2019; speech recognition","author":"serizel","year":"2014","journal-title":"Proc 1st Italian Comput Linguistics Conf"},{"key":"ref65","first-page":"2","article-title":"Application of pretrained deep neural networks to large vocabulary speech recognition","author":"jaitly","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref66","first-page":"1","article-title":"Deep belief networks for phone recognition","volume":"4","author":"mohamed","year":"2009","journal-title":"Scholarpedia"},{"key":"ref67","first-page":"376","article-title":"From speech to letters&#x2014;Using a novel neural network architecture for grapheme based ASR","author":"eyben","year":"2010","journal-title":"Proc IEEE Workshop Autom Speech Recognition Understanding (ASRU)"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960445"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2013.2248112"},{"key":"ref197","first-page":"3576","article-title":"Integration of DNN based Speech Enhancement and ASR","author":"astudillo","year":"2015","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404784"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7177932"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2015.2420092"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-015-0069-2"},{"key":"ref195","first-page":"3630","article-title":"fMLLR based feature-space speaker adaptation of DNN acoustic models","volume":"1","author":"parthasarathi","year":"2015","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-015-0058-5"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ICoSP.2012.6491550"},{"key":"ref94","first-page":"2594","article-title":"Integrating deep neural networks into structural classification approach based on weighted finite-state transducers","author":"kubo","year":"2012","journal-title":"Proc Int Speech Commun Assoc (Interspeech)"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178844"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424210"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2016.06.007"},{"key":"ref92","first-page":"1","article-title":"Factorized deep neural networks for adaptive speech recognition","author":"yu","year":"2012","journal-title":"Proc Int Workshop Stat Mach Learn Speech Process"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288897"},{"key":"ref90","first-page":"1","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"yu","year":"2012","journal-title":"Proc 29th Int Conf Mach Learn"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638948"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638952"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424224"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947401"},{"key":"ref81","first-page":"237","article-title":"Improved bottleneck features using pretrained deep neural networks","author":"yu","year":"2011","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163900"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947700"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref85","first-page":"7","article-title":"Speech recognition with segmental conditional random fields: Final report from the 2010 JHU summer workshop","author":"zweig","year":"2010"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2109382"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424251"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288837"},{"key":"ref200","first-page":"1745","article-title":"Multi-resolution stacking for speech separation based on boosted DNN","volume":"1","author":"zhang","year":"2015","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref101","doi-asserted-by":"crossref","first-page":"662","DOI":"10.21437\/Interspeech.2013-189","article-title":"Accurate and compact large vocabulary speech recognition on mobile devices","author":"lei","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref100","first-page":"104","article-title":"A scalable approach to using DNN-derived features in GMM-HMM based acoustic modeling for LVCSR","author":"yan","year":"2013","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404786"},{"key":"ref203","first-page":"1078","article-title":"Convolutional deep maxout networks for phone recognition","author":"t\u00f3th","year":"2014","journal-title":"Proc 15th Annu Conf Int Speech Commun Assoc (INTERSPEECH)"},{"key":"ref204","first-page":"1","article-title":"Real-time dereverberation for deep neural network speech recognition coherence based spectral enhancement","author":"schwarz","year":"2015","journal-title":"Proc DAGA"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-014-0047-0"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178828"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1186\/s13634-015-0238-6"},{"key":"ref208","first-page":"92","article-title":"Time delay deep neural network-based universal background models for speaker recognition","author":"snyder","year":"2016","journal-title":"Proc IEEE Workshop Autom Speech Recognition Understanding (ASRU)"},{"key":"ref205","first-page":"482","article-title":"Robust ASR using neural network based speech enhancement and feature simulation","author":"sivasankaran","year":"2015","journal-title":"Proc IEEE Workshop Autom Speech Recog and Understanding"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178798"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-016-0085-x"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472767"},{"key":"ref212","first-page":"2173","article-title":"Toward a better understanding of deep neural network based acoustic modelling: An empirical investigation","author":"wang","year":"2016","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-016-0088-7"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2698723"},{"key":"ref215","first-page":"1","article-title":"A hybrid DSP\/deep learning approach to real-time full-band speech enhancement","author":"valin","year":"2017","journal-title":"Proc IEEE 20th Int Workshop Multimedia Signal Process (MMSP)"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953084"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1145\/3146347.3146351"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2756439"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952121"},{"key":"ref220","first-page":"1323","article-title":"Deep learning-based telephony speech recognition in the wild","author":"han","year":"2017","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2017.02.013"},{"key":"ref221","doi-asserted-by":"crossref","first-page":"999","DOI":"10.21437\/Interspeech.2017-620","article-title":"Deep neural network embeddings for text-independent speaker verification","author":"snyder","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953077"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1109\/HSCMA.2017.7895577"},{"key":"ref226","author":"xu","year":"2017","journal-title":"Multi-objective learning and mask-based post-processing for deep neural network based speech enhancement"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2672401"},{"key":"ref224","first-page":"3632","article-title":"Improving mask learning based speech enhancement system with restorationlayers and residual connection","author":"chen","year":"2017","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.03.003"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707758"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639201"},{"key":"ref125","doi-asserted-by":"crossref","first-page":"3771","DOI":"10.21437\/Interspeech.2013-596","article-title":"Investigation of recurrent-neural-network architectures and learning methods for spoken language understanding","volume":"2","author":"mesnil","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707719"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638949"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707746"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639348"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638312"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639103"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639084"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639102"},{"key":"ref232","article-title":"Feature selection based transfer subspace learning for speech emotion recognition","author":"song","year":"0","journal-title":"IEEE Trans Affect Comput"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.11.003"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1587\/transfun.E101.A.585"},{"key":"ref136","first-page":"6","article-title":"Rectifier nonlinearities improve neural network acoustic models","volume":"28","author":"maas","year":"2013","journal-title":"Proc 30th Int Conf Mach Learn"},{"key":"ref135","first-page":"1248","article-title":"Rapid and effective speaker adaptation of convolutional neural network based models for speech recognition","author":"abdel-hamid","year":"2016","journal-title":"Proc INTERSPEECH"},{"key":"ref138","first-page":"2345","article-title":"Sequence-discriminative training of deep neural networks","volume":"1","author":"vesel\u00fd","year":"2013","journal-title":"Proc Annu Conf Int Speech Commun Assoc (Interspeech)"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-552"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639212"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-203"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2227738"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1016\/bs.host.2018.05.001","article-title":"Deep learning for natural language processing","author":"xie","year":"2018","journal-title":"Handbook of Statistics"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2250961"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2881096"},{"key":"ref145","first-page":"1895","article-title":"A comparative analytic study on the Gaussian mixture and context dependent deep neural network hidden Markov models","author":"huang","year":"2014","journal-title":"Proc 15th Annu Conf Int Speech Commun Assoc (INTERSPEECH)"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707763"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638346"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639081"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639092"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639140"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638963"},{"key":"ref111","first-page":"467","article-title":"Deep neural network approach for the dialog state tracking challenge","author":"henderson","year":"2013","journal-title":"Proc SIGDIAL Conf"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638959"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707745"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639344"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639345"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2012.2237151"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2304637"},{"key":"ref15","first-page":"4072","article-title":"An overview of automatic speaker recognition technology","volume":"4","author":"reynolds","year":"2002","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(91)90054-W"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2013.58"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/89.905995"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638967"},{"key":"ref18","first-page":"149","article-title":"Application of Automatic Speaker Recognition techniques to pathological voice assessment (dysphonia)","author":"fredouille","year":"2005","journal-title":"Proc Eur Conf Speech Commun Technol (Eurospeech)"},{"key":"ref19","first-page":"1","article-title":"Affective Computing","author":"picard","year":"1995"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638951"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707748"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639211"},{"key":"ref115","first-page":"3366","article-title":"Exploring convolutional neural network structures and optimization techniques for speech recognition","author":"abdel-hamid","year":"2013","journal-title":"Proc 14th Annu Conf Int Speech Commun Assoc (INTERSPEECH)"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639038"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707749"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707743"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8600701\/08632885.pdf?arnumber=8632885","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,14]],"date-time":"2024-07-14T14:07:11Z","timestamp":1720966031000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8632885\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":232,"URL":"https:\/\/doi.org\/10.1109\/access.2019.2896880","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]}}}