{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T09:15:59Z","timestamp":1743066959454,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":51,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642453571"},{"type":"electronic","value":"9783642453588"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-642-45358-8_13","type":"book-chapter","created":{"date-parts":[[2014,4,22]],"date-time":"2014-04-22T10:16:03Z","timestamp":1398161763000},"page":"409-459","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Automatic Speech Recognition"],"prefix":"10.1007","author":[{"given":"Hagen","family":"Soltau","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"George","family":"Saon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lidia","family":"Mangu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hong-Kwang","family":"Kuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian","family":"Kingsbury","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephen","family":"Chu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fadi","family":"Biadsy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,3,25]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Kneser R, Ney H (1995) Improved backing-off for m-gram language modeling. In: International conference on acoustics, speech, and signal processing. ICASSP-95, Detroit, vol\u00a0I, pp\u00a0181\u2013184","key":"13_CR1","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"13_CR2","first-page":"1637","volume-title":"Recent progress in Arabic broadcast news transcription at BBN","author":"M Afify","year":"2005","unstructured":"Afify M, Nguyen L, Xiang B, Abdou S, Makhoul J (2005) Recent progress in Arabic broadcast news transcription at BBN. In: Proceedings of the Interspeech, Lisbon, pp\u00a01637\u20131640"},{"key":"13_CR3","first-page":"1137","volume":"3","author":"Y Bengio","year":"2003","unstructured":"Bengio Y, Ducharme R, Vincent P, Jauvin C (2003) A neural probabilistic language model. J\u00a0Mach Learn Res 3:1137\u20131155","journal-title":"J Mach Learn Res"},{"unstructured":"Biadsy F (2011) Automatic dialect and accent recognition and its application to speech recognition. PhD. thesis, Columbia University","key":"13_CR4"},{"doi-asserted-by":"crossref","unstructured":"Biadsy F, Habash N, Hirschberg J (2009) Improving the Arabic pronunciation dictionary for phone and word recognition with linguistically-based pronunciation rules. In: Proceedings of NAACL\/HLT 2009, Colorado","key":"13_CR5","DOI":"10.3115\/1620754.1620812"},{"key":"13_CR6","volume-title":"Dialect and accent recognition using phonetic-segmentation supervectors","author":"F Biadsy","year":"2011","unstructured":"Biadsy F, Hirschberg J, Ellis D (2011) Dialect and accent recognition using phonetic-segmentation supervectors. In: Interspeech, Florence"},{"unstructured":"Buckwalter T (2004) LDC2004L02: Buckwalter Arabic morphological analyzer version 2.0. Linguistic Data Consortium, Philadelphia","key":"13_CR7"},{"issue":"5","key":"13_CR8","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1109\/LSP.2006.870086","volume":"13","author":"W Campbell","year":"2006","unstructured":"Campbell W, Sturim D, Reynolds D (2006) Support vector machines using GMM supervectors for speaker verification. IEEE Signal Process Lett 13(5):308\u2013311","journal-title":"IEEE Signal Process Lett"},{"key":"13_CR9","volume-title":"SVM based speaker verification using a GMM supervector kernel and NAP variability compensation","author":"W Campbell","year":"2006","unstructured":"Campbell W, Sturim D, Reynolds D, Solomonoff A (2006) SVM based speaker verification using a GMM supervector kernel and NAP variability compensation. In: Proceedings of the ICASSP, France"},{"doi-asserted-by":"crossref","unstructured":"Chelba C, Jelinek F (1998) Exploiting syntactic structure for language modeling. In: Proceedings of the 36th annual meeting of the Association for Computational Linguistics and 17th international conference on computational linguistics, Montreal, pp\u00a0225\u2013231","key":"13_CR10","DOI":"10.3115\/980845.980882"},{"key":"13_CR11","volume-title":"Shrinking exponential language models","author":"SF Chen","year":"2009","unstructured":"Chen SF (2009) Shrinking exponential language models. In: Proceedings of the NAACL-HLT, Boulder"},{"key":"13_CR12","first-page":"1037","volume-title":"Enhanced word classing for model M","author":"S Chen","year":"2010","unstructured":"Chen S, Chu S (2010) Enhanced word classing for model M. In: Proceedings of the Interspeech, Makuhari, pp\u00a01037\u20131040"},{"unstructured":"Chen SF, Goodman JT (1998) An empirical study of smoothing techniques for language modeling. Technical report TR-10-98, Harvard University","key":"13_CR13"},{"key":"13_CR14","first-page":"4374","volume-title":"The 2009 IBM GALE Mandarin broadcast transcription system","author":"S Chu","year":"2010","unstructured":"Chu S, Povey D, Kuo HK, Mangu L, Zhang S, Shi Q, Qin Y (2010) The 2009 IBM GALE Mandarin broadcast transcription system. In: Proceedings of the ICASSP, Dallas, pp\u00a04374\u20134377"},{"doi-asserted-by":"crossref","unstructured":"Collins M, Roark B, Saraclar M (2005) Discriminative syntactic language modeling for speech recognition. In: Proceedings of the 43rd annual meeting of the Association for Computational Linguistics, Ann Arbor, pp\u00a0507\u2013514","key":"13_CR15","DOI":"10.3115\/1219840.1219903"},{"key":"13_CR16","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"SB Davis","year":"1980","unstructured":"Davis SB, Mermelstein P (1980) Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process 28:357\u2013366","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"13_CR17","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1007\/s10994-005-0916-y","volume":"60","author":"A Emami","year":"2005","unstructured":"Emami A, Jelinek F (2005) A neural syntactic language model. Mach Learn 60:195\u2013227","journal-title":"Mach Learn"},{"doi-asserted-by":"crossref","unstructured":"Emami A, Mangu L (2007) Empirical study of neural network language models for Arabic speech recognition. In: Proceedings of the ASRU 2007, Kyoto, pp\u00a0147\u2013152","key":"13_CR18","DOI":"10.1109\/ASRU.2007.4430100"},{"key":"13_CR19","volume-title":"Integrating dynamic speech modalities into context decision trees","author":"C F\u00fcgen","year":"2000","unstructured":"F\u00fcgen C, Rogina I (2000) Integrating dynamic speech modalities into context decision trees. In: Proceedings of the ICASSP, Istanbul"},{"issue":"3","key":"13_CR20","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1109\/89.759034","volume":"7","author":"MJF Gales","year":"1999","unstructured":"Gales MJF (1999) Semi-tied covariance matrices for hidden Markov models. IEEE Trans Speech Audio Process 7(3):272\u2013281","journal-title":"IEEE Trans Speech Audio Process"},{"key":"13_CR21","first-page":"249","volume-title":"Understanding the difficulty of training deep feedforward neural networks","author":"X Glorot","year":"2010","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the AISTATS, Sardinia, pp\u00a0249\u2013256"},{"doi-asserted-by":"crossref","unstructured":"Habash N, Rambow O (2005) Arabic tokenization, part-of-speech tagging and morphological disambiguation in one fell swoop. In: Proceedings of the 43rd annual meeting of the Association for Computational Linguistics (ACL\u201905), Ann Arbor. Association for Computational Linguistics, pp 573\u2013580. http:\/\/www.aclweb.org\/anthology\/P\/P05\/P05-1071","key":"13_CR22","DOI":"10.3115\/1219840.1219911"},{"doi-asserted-by":"crossref","unstructured":"Habash N, Rambow O (2007) Arabic diacritization through full morphological tagging. In: NAACL07, Rochester","key":"13_CR23","DOI":"10.3115\/1614108.1614122"},{"key":"13_CR24","first-page":"3761","volume-title":"Lattice-based optimization of sequence classification criteria for neural-network acoustic modeling","author":"B Kingsbury","year":"2009","unstructured":"Kingsbury B (2009) Lattice-based optimization of sequence classification criteria for neural-network acoustic modeling. In: Proceedings of the ICASSP, Taipei, pp\u00a03761\u20133764"},{"key":"13_CR25","first-page":"4378","volume-title":"The IBM 2009 GALE Arabic speech transcription system","author":"B Kingsbury","year":"2011","unstructured":"Kingsbury B, Soltau H, Saon G, Chu S, Kuo HK, Mangu L, Ravuri S, Morgan N, Janin A (2011) The IBM 2009 GALE Arabic speech transcription system. In: Proceedings of the ICASSP, Prague, pp\u00a04378\u20134381"},{"key":"13_CR26","volume-title":"Cross-dialectal acoustic data sharing for Arabic speech recognition","author":"K Kirchhoff","year":"2004","unstructured":"Kirchhoff K, Vergyri D (2004) Cross-dialectal acoustic data sharing for Arabic speech recognition. In: ICASSP, Montreal"},{"key":"13_CR27","first-page":"344","volume-title":"Novel approaches to Arabic speech recognition: report from the 2002 Johns-Hopkins summer workshop","author":"K Kirchhoff","year":"2003","unstructured":"Kirchhoff K, Bilmes J, Das S, Duta N, Egan M, Ji G, He F, Henderson J, Liu D, Noamany M, Schone P, Schwartz R, Vergyri D (2003) Novel approaches to Arabic speech recognition: report from the 2002 Johns-Hopkins summer workshop. In: Proceedings of the ICASSP, Hong\u00a0Kong, pp\u00a0344\u2013347"},{"issue":"4","key":"13_CR28","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1016\/j.csl.2005.10.001","volume":"20","author":"K Kirchhoff","year":"2006","unstructured":"Kirchhoff K, Vergyri D, Bilmes J, Duh K, Stolcke A (2006) Morphology-based language modeling for conversational Arabic speech recognition. Comput Speech Lang 20(4):589\u2013608","journal-title":"Comput Speech Lang"},{"doi-asserted-by":"crossref","unstructured":"Kuo HKJ, Mangu L, Emami A, Zitouni I, Lee YS (2009) Syntactic features for Arabic speech recognition. In: Proceedings of the ASRU 2009, Merano","key":"13_CR29","DOI":"10.1109\/ASRU.2009.5373470"},{"doi-asserted-by":"crossref","unstructured":"Kuo HKJ, Mangu L, Emami A, Zitouni I (2010) Morphological and syntactic features for Arabic speech recognition. In: Proceedings of the ICASSP 2010, Dallas","key":"13_CR30","DOI":"10.1109\/ICASSP.2010.5495010"},{"key":"13_CR31","volume-title":"Minimum Bayes risk discriminative language models for Arabic speech recognition","author":"H Kuo","year":"2011","unstructured":"Kuo H, Mangu L, Arisoy E, Saon G (2011) Minimum Bayes risk discriminative language models for Arabic speech recognition. In: Proceedings of the of IEEE ASRU, Waikoloa"},{"key":"13_CR32","first-page":"1137","volume-title":"The Penn Arabic Treebank: building a large-scale annotated Arabic corpus","author":"M Maamouri","year":"2004","unstructured":"Maamouri M, Bies A, Buckwalter T, Mekki W (2004) The Penn Arabic Treebank: building a large-scale annotated Arabic corpus. In: Proceedings of NEMLAR conference on Arabic language resources and tools, Cairo, pp\u00a01137\u20131155"},{"key":"13_CR33","first-page":"1093","volume-title":"Arabic broadcast news transcription using a one million word vocalized vocabulary","author":"A Messaoudi","year":"2006","unstructured":"Messaoudi A, Gauvain JL, Lamel L (2006) Arabic broadcast news transcription using a one million word vocalized vocabulary. In: Proceedings of the ICASSP, Toulouse, pp\u00a01093\u20131096"},{"key":"13_CR34","first-page":"105","volume-title":"Minimum phone error and I-smoothing for improved discriminative training","author":"D Povey","year":"2002","unstructured":"Povey D, Woodland PC (2002) Minimum phone error and I-smoothing for improved discriminative training. In: Proceedings of the ICASSP, Orlando, vol\u00a0I, pp\u00a0105\u2013108"},{"key":"13_CR35","first-page":"4057","volume-title":"Boosted MMI for model and feature space discriminative training","author":"D Povey","year":"2008","unstructured":"Povey D, Kanevsky D, Kingsbury B, Ramabhadran B, Saon G, Visweswariah K (2008) Boosted MMI for model and feature space discriminative training. In: Proceedings of the ICASSP, Las Vegas, pp\u00a04057\u20134060"},{"key":"13_CR36","first-page":"4330","volume-title":"Subspace Gaussian mixture models for speech recognition","author":"D Povey","year":"2010","unstructured":"Povey D, Burget L, Agarwal M, Akyazi P, Feng K, Ghoshal A, Glembek O, Goel NK, Karafiat M, Rastrow A, Rose RC, Schwarz P, Thomas S (2010) Subspace Gaussian mixture models for speech recognition. In: Proceedings of the ICASSP, Dallas, pp\u00a04330\u20134333"},{"key":"13_CR37","volume-title":"A unified approach of incorporating general features in decsion tree based acoustic modeling","author":"W Reichl","year":"1999","unstructured":"Reichl W, Chou W (1999) A unified approach of incorporating general features in decsion tree based acoustic modeling. In: Proceedings of the ICASSP, Phoenix"},{"key":"13_CR38","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"D Reynolds","year":"2000","unstructured":"Reynolds D, Quatieri T, Dunn R (2000) Speaker verification using adapted Gaussian mixture models. Digit Signal Process 10:19\u201341","journal-title":"Digit Signal Process"},{"doi-asserted-by":"crossref","unstructured":"Sainath T, Kingsbury B, Ramabhadran B (2010) Auto-encode bottleneck features using deep belief networks. In: Proceedings of the ICASSP 2012, Kyoto","key":"13_CR39","DOI":"10.1109\/ICASSP.2012.6288833"},{"key":"13_CR40","first-page":"5056","volume-title":"Bayesian sensing hidden Markov models for speech recognition","author":"G Saon","year":"2011","unstructured":"Saon G, Chien JT (2011) Bayesian sensing hidden Markov models for speech recognition. In: Proceedings of ICASSP, Prague, pp\u00a05056\u20135059"},{"key":"13_CR41","first-page":"5316","volume-title":"Discriminative training for Bayesian sensing hidden Markov models","author":"G Saon","year":"2011","unstructured":"Saon G, Chien JT (2011) Discriminative training for Bayesian sensing hidden Markov models. In: Proceedings of ICASSP, Prague, pp\u00a05316\u20135319"},{"key":"13_CR42","volume-title":"Some properties of Bayesian sensing hidden Markov models","author":"G Saon","year":"2011","unstructured":"Saon G, Chien JT (2011) Some properties of Bayesian sensing hidden Markov models. In: Proceedings of IEEE ASRU, Waikoloa"},{"key":"13_CR43","first-page":"4378","volume-title":"The IBM 2008 GALE Arabic speech transcription system","author":"G Saon","year":"2010","unstructured":"Saon G, Soltau H, Chaudhari U, Chu S, Kingsbury B, Kuo HK, Mangu L, Povey D (2010) The IBM 2008 GALE Arabic speech transcription system. In: Proceedings of the ICASSP, Dallas, pp\u00a04378\u20134381"},{"doi-asserted-by":"crossref","unstructured":"Schwenk H (2007) Continuous space language models. Comput Speech Lang 21(3). doi:http:\/\/dx.doi.org\/10.1016\/j.csl.2006.09.003","key":"13_CR44","DOI":"10.1016\/j.csl.2006.09.003"},{"key":"13_CR45","volume-title":"Phone dependent modeling of hyperarticulated effects","author":"H Soltau","year":"2000","unstructured":"Soltau H, Waibel A (2000) Phone dependent modeling of hyperarticulated effects. In: Proceedings of the ICSLP, Beijing"},{"issue":"5","key":"13_CR46","doi-asserted-by":"publisher","first-page":"884","DOI":"10.1109\/TASL.2009.2022966","volume":"17","author":"H Soltau","year":"2009","unstructured":"Soltau H, Saon G, Kingsbury B, Kuo HKJ, Mangu L, Povey D, Emami A (2009) Advances in Arabic speech transcription at IBM under the DARPA GALE program. IEEE Trans Audio Speech Lang Process 17(5):884\u2013894","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"13_CR47","volume-title":"The IBM Attila speech recognition toolkit","author":"H Soltau","year":"2010","unstructured":"Soltau H, Saon G, Kingsbury B (2010) The IBM Attila speech recognition toolkit. In: Proceedings of the IEEE workshop on spoken language technology, Berkeley"},{"key":"13_CR48","first-page":"270","volume-title":"Entropy-based pruning of backoff language models","author":"A Stolcke","year":"1998","unstructured":"Stolcke A (1998) Entropy-based pruning of backoff language models. In: Proceedings of the DARPA broadcast news transcription and understanding workshop, Lansdowne, pp\u00a0270\u2013274"},{"key":"13_CR49","first-page":"901","volume-title":"SRILM \u2013 an extensible language modeling toolkit","author":"A Stolcke","year":"2002","unstructured":"Stolcke A (2002) SRILM \u2013 an extensible language modeling toolkit. In: Proceedings of the ICSLP, Denver, pp 901\u2013904"},{"key":"13_CR50","volume-title":"The SRI March 2000 Hub-5 conversational speech transcription system","author":"A Stolcke","year":"2000","unstructured":"Stolcke A, Bratt H, Butzberger J, Franco H, Gadde VRR, Plauche M, Richey C, Shriberg\u00a0E, Sonmez K, Weng F, Zheng J (2000) The SRI March 2000 Hub-5 conversational speech transcription system. In: Proceedings of the NIST speech transcription workshop, College Park"},{"key":"13_CR51","volume-title":"Development of a conversational telephone speech recognizer for Levantine Arabic","author":"D Vergyri","year":"2005","unstructured":"Vergyri D, Kirchhoff K, Gadde R, Stolcke A, Zheng J (2005) Development of a conversational telephone speech recognizer for Levantine Arabic. In: Interspeech, Lisbon"}],"container-title":["Theory and Applications of Natural Language Processing","Natural Language Processing of Semitic Languages"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-45358-8_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T02:31:40Z","timestamp":1676860300000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-45358-8_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783642453571","9783642453588"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-45358-8_13","relation":{},"ISSN":["2192-032X","2192-0338"],"issn-type":[{"type":"print","value":"2192-032X"},{"type":"electronic","value":"2192-0338"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"25 March 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}