{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,5,7]],"date-time":"2023-05-07T22:40:21Z","timestamp":1683499221265},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2006,8,1]],"date-time":"2006-08-01T00:00:00Z","timestamp":1154390400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2006,8]]},"DOI":"10.1007\/s10489-006-8865-0","type":"journal-article","created":{"date-parts":[[2006,7,1]],"date-time":"2006-07-01T10:19:32Z","timestamp":1151749172000},"page":"37-57","source":"Crossref","is-referenced-by-count":0,"title":["Discriminative training via minimization of risk estimates based on Parzen smoothing"],"prefix":"10.1007","volume":"25","author":[{"given":"Erik","family":"McDermott","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shigeru","family":"Katagiri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"8865_CR1","unstructured":"Duda RO, Hart PE (1973) PatternClassification and Scene Analysis. Wiley Interscience Publications"},{"key":"8865_CR2","first-page":"1065","volume":"33","author":"E Parzen","year":"1962","unstructured":"Parzen E (1962) On Estimation of a Probability Density Function and Mode. ams 33:1065\u20131076","journal-title":"ams"},{"key":"8865_CR3","doi-asserted-by":"crossref","unstructured":"McLachlan GJ (1992) Discriminant Analysis and Statistical Pattern Recognition. Wiley","DOI":"10.1002\/0471725293"},{"issue":"3","key":"8865_CR4","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1109\/PGEC.1967.264666","volume":"EC-16","author":"S Amari","year":"1967","unstructured":"Amari S (1967) A Theory of Adaptive Pattern Classifiers. IEEE, Trans. Electr Comp EC-16(3):299\u2013307","journal-title":"IEEE, Trans. Electr Comp"},{"key":"8865_CR5","doi-asserted-by":"crossref","unstructured":"Devroye L, Gyorfi L, Lugosi G (1996) A Probabilistic Theory of Pattern Recognition. Springer Verlag","DOI":"10.1007\/978-1-4612-0711-5"},{"key":"8865_CR6","unstructured":"Katagiri S, Lee C-H, Juang B-H (1990) A Generalized Probabilistic Descent Method, In: Proceedings Acoustical Society of Japan, ser. Fall meeting, pp 141\u2013142"},{"key":"8865_CR7","doi-asserted-by":"crossref","unstructured":"Katagiri S, Lee C-H, Juang B-H (1991) New discriminative training algorithms based on the generalized descent method, In: Proceedings IEEE Worshop on Neural Networks for Signal Processing, pp 299\u2013308","DOI":"10.1109\/NNSP.1991.239512"},{"issue":"11","key":"8865_CR8","doi-asserted-by":"crossref","first-page":"2345","DOI":"10.1109\/5.726793","volume":"86","author":"S Katagiri","year":"1998","unstructured":"Katagiri S, Juang B-H, Lee C-H (1998) Pattern recognition using a family of design algorithms based upon the Generalized Probabilistic Descent method. Proceedings of the IEEE, 86 (11):2345\u20132373","journal-title":"Proceedings of the IEEE"},{"issue":"12","key":"8865_CR9","doi-asserted-by":"crossref","first-page":"3043","DOI":"10.1109\/78.175747","volume":"40","author":"B-H Juang","year":"1992","unstructured":"Juang B-H, Katagiri S (1992) Discriminative Learning for Minimum Error Classification. IEEE Trans, on Acoustics, Speech, and Signal Processing 40(12):3043\u20133054","journal-title":"IEEE Trans, on Acoustics, Speech, and Signal Processing"},{"key":"8865_CR10","unstructured":"McDermott E (1997) Discriminative Training for Speech Recognition. Ph.D. dissertation, Waseda University, School of Engineering"},{"key":"8865_CR11","doi-asserted-by":"crossref","first-page":"652","DOI":"10.1109\/ICASSP.1993.319394","volume":"2","author":"W Chou","year":"1993","unstructured":"Chou W, Lee C-H, Juang B-H (1993) Minimum error rate training based on N-best string models. In: Proceedings IEEE ICASSP 2, pp 652\u2013655","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR12","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1006\/csla.1994.1018","volume":"8","author":"E McDermott","year":"1994","unstructured":"McDermott E, Katagiri S (1994) Prototype Based Discriminative Training for Various Speech Units. Computer Speech and Language 8:351\u2013368","journal-title":"Computer Speech and Language"},{"key":"8865_CR13","first-page":"585","volume":"2","author":"P Clarkson","year":"1999","unstructured":"Clarkson P, Moreno P (1999) On the use of support vector machines for phonetic classification. In: Proceedings IEEE ICASSP 2, pp 585\u2013588","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR14","unstructured":"Cortes C, Haffner P, Mohri M (2002) Rational kernels, In: Proceedings of NIPS"},{"key":"8865_CR15","unstructured":"Jaakkola T, Diekhaus M, Haussler D (1999) Using the fisher kernel method to detect remote protein homologies. In: Proceedings of 7th Conference on Intelligent Systems for Molecular Biology, pp 149\u2013158,"},{"key":"8865_CR16","first-page":"3739","volume":"6","author":"E McDermott","year":"2000","unstructured":"McDermott E, Biem A, Tenpaku S, Katagiri S (2000) Discriminative training for large vocabulary telephone-based name recognition. In: Proceedings IEEE ICASSP 6, pp 3739\u20133742","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR17","first-page":"937","volume":"1","author":"E McDermott","year":"2004","unstructured":"McDermott E, Hazen TJ (2004) Minimum Classification Error training of landmark models for real-time continuous speech recognition. In: Proceedings IEEE ICASSP 1, pp 937\u2013940","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR18","doi-asserted-by":"crossref","unstructured":"Biem A (2001) Minimum Classification Error Training of Hidden Markov Models for Handwriting Recognition. In: Proceedings IEEE ICASSP, vol. 3","DOI":"10.1109\/ICASSP.2001.941223"},{"key":"8865_CR19","doi-asserted-by":"crossref","unstructured":"Gao S, Wu W, Lee C-H, Chua T-S (2003) A Maximal Figure-of-Merit Learning Approach to Text Categorization. In: Proceedings of ACM SIGIR Conference on Research and Development in Information Retrieval, pp 174\u2013181","DOI":"10.1145\/860435.860469"},{"key":"8865_CR20","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins H, Monroe H (1951) A stochastic approximation method. Annals of Mathematical Statistics 22:400\u2013407","journal-title":"Annals of Mathematical Statistics"},{"key":"8865_CR21","doi-asserted-by":"crossref","unstructured":"Vapnik VN (1995) The Nature of Statistical Learning Theory. Springer-Verlag","DOI":"10.1007\/978-1-4757-2440-0"},{"key":"8865_CR22","unstructured":"Vapnik VN (1998) Statistical learning theory. Wiley"},{"key":"8865_CR23","first-page":"3490","volume":"6","author":"JPE Gokcay","year":"2000","unstructured":"Gokcay JPE (2000) A New Clustering Evaluation Function Using Renyi\u2019s Information Potential. In: Proceedings ICASSP 2000 6, pp 3490\u20133493","journal-title":"Proceedings ICASSP 2000"},{"key":"8865_CR24","first-page":"49","volume":"1","author":"L Bahl","year":"1986","unstructured":"Bahl L, Brown P, de Souza PV, Mercer KL (1986) Maximum Mutual Information Estimation of hidden Markov parameters for speech recognition. In: Proceedings IEEE ICASSP 1, pp 49\u201352","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR25","doi-asserted-by":"crossref","unstructured":"Brown P (1987) The acoustic-modeling problem in automatic speech recognition. Ph.D. dissertation, Carnegie Mellon University, Department of Computer Science","DOI":"10.21236\/ADA188529"},{"issue":"9","key":"8865_CR26","doi-asserted-by":"crossref","first-page":"1432","DOI":"10.1109\/29.90371","volume":"36","author":"DNA Nadas","year":"1988","unstructured":"Nadas DNA, Picheny M (1988) On a model-robust training method for speech recognition. IEEE Transactions on Signal Processing 36(9):1432\u20131435","journal-title":"IEEE Transactions on Signal Processing"},{"key":"8865_CR27","first-page":"20","volume":"1","author":"PS Gopalakrishnan","year":"1988","unstructured":"Gopalakrishnan PS, Kanevsky D, Nadas A, Nahamoo D, Picheny M (1988) Decoder selection based on cross-entropies. In: Proceedings IEEE ICASSP 1:20\u201323","journal-title":"In: Proceedings IEEE ICASSP"},{"issue":"1","key":"8865_CR28","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1109\/18.61108","volume":"37","author":"ANPS Gopalakrishnan","year":"1991","unstructured":"Gopalakrishnan ANPS, Kanevsky D, Nahamoo D (1991) An inequality for rational functions with applications to some statistical estimation problems. IEEE Transactions on Information Theory 37(1):107\u2013113","journal-title":"IEEE Transactions on Information Theory"},{"key":"8865_CR29","unstructured":"Normandin Y, (1991) Hidden Markov models, maximum mutual information estimation, and the speech recognition problem. Ph.D. dissertation, McGill University, Montreal, Department of Electrical Engineering"},{"key":"8865_CR30","first-page":"605","volume":"2","author":"V Valtchev","year":"1996","unstructured":"Valtchev V, Odell JJ, Woodland PC, Young SJ (1996) Lattice-based discriminative training for large vocabulary speech recognition. In: International Conference on Spoken Language Processing, vol. 2, pp 605\u2013609","journal-title":"International Conference on Spoken Language Processing"},{"key":"8865_CR31","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1006\/csla.2001.0182","volume":"16","author":"P Woodland","year":"2002","unstructured":"Woodland P, Povey D (2002) Large scale discriminative training of hidden Markov models for speech recognition, Computer Speech and Language, vol. 16, pp 25\u201347","journal-title":"Computer Speech and Language"},{"key":"8865_CR32","doi-asserted-by":"crossref","unstructured":"Schluter R, Macherey W, Kanthak S, Ney H, Welling L (1998) Comparison of Discriminative Training Criteria. In: Proceedings ICASSP \u201998, pp 493\u2013496","DOI":"10.1109\/ICASSP.1998.674475"},{"key":"8865_CR33","first-page":"473","volume":"1","author":"W Chou","year":"1992","unstructured":"Chou W, Juang B-H, Lee C-H (1992) Segmental GPD training of HMM based speech recognizer. In: Proceedings IEEE ICASSP 1, pp 473\u2013476","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR34","unstructured":"Fahlman SE (1988) An empirical study of learning speed in back-propagation networks, Canergie Mellon University, Tech. Rep"},{"key":"8865_CR35","doi-asserted-by":"crossref","unstructured":"Le Roux J, McDermott E (2005) Optimization methods for discriminative training, pp 3341\u20133344","DOI":"10.21437\/Interspeech.2005-858"},{"key":"8865_CR36","doi-asserted-by":"crossref","unstructured":"Kanevsky D (1995) A generalization of the Baum algorithm to functions on non-linear manifolds, In: Proceedings ICASSP \u201995, pp 473\u2013476","DOI":"10.1109\/ICASSP.1995.479631"},{"key":"8865_CR37","doi-asserted-by":"crossref","unstructured":"Macherey W, Haferkamp L, Schlueter R, Ney H (2005) Investigations on Error Minimizing Training Criteria for Discriminative Training in Automatic Speech Recognition","DOI":"10.21437\/Interspeech.2005-693"},{"key":"8865_CR38","first-page":"107","volume":"18","author":"E McDermott","year":"2004","unstructured":"McDermott E, Katagiri S (2004) A derivation of Minimum Classification Error from the theoretical classification risk using, Parzen estimation, 18:107\u2013122","journal-title":"Parzen estimation"},{"key":"8865_CR39","first-page":"713","volume":"2","author":"E McDermott","year":"2003","unstructured":"McDermott E, Katagiri S (2003) A new formalization of minimum classification error using a parzen estimate of classification chance. In: Proceedings IEEE ICASSP, vol 2, pp 713\u2013716","journal-title":"Proceedings IEEE ICASSP"},{"key":"8865_CR40","unstructured":"McDermott E (2000) Discriminative prototype-based methods for speech recognition, In: Handbook for Neural Networks in Speech Processing. Morwood, MA. USA, Artech House, pp 159\u2013216"},{"key":"8865_CR41","unstructured":"Kawahara T, Nanjo H, Shinozaki T, Furui S (2003) Benchmark test for speech recognition using the corpus of spontaneous Japanese, In: Proceedings of the Spontaneous Speech Processing & Recognition Workshop, Tokyo, pp 135\u2013138"},{"issue":"11","key":"8865_CR42","doi-asserted-by":"crossref","first-page":"1641","DOI":"10.1109\/29.46546","volume":"37","author":"K-F Lee","year":"1989","unstructured":"Lee K-F, Hon H-W (1989) Speaker-independent phone recognition using hidden markov models. IEEE Trans. on Acoustics. Speech. and Signal Processing 37(11):1641\u20131648","journal-title":"IEEE Trans. on Acoustics. Speech. and Signal Processing"},{"key":"8865_CR43","doi-asserted-by":"crossref","unstructured":"Silverman BW (1986) Density Estimation for Statistics and Data Analysis. CRC Press","DOI":"10.1007\/978-1-4899-3324-9"},{"key":"8865_CR44","unstructured":"Devroye L, Gyorfi L (1985) Nonparametric density estimation: The L1 view. Wiley"},{"key":"8865_CR45","unstructured":"Parzen E (1992) Modern Probability Theory and its Applications. Wiley Classics"},{"issue":"4","key":"8865_CR46","doi-asserted-by":"crossref","first-page":"893","DOI":"10.1162\/neco.1993.5.6.893","volume":"5","author":"V Vapnik","year":"1993","unstructured":"Vapnik V, Bottou L (1993) Local algorithms for pattern recognition and dependencies estimation. Neural Computation 5(4):893\u2013909","journal-title":"Neural Computation"},{"key":"8865_CR47","unstructured":"Biem A (1997) Discriminative Feature Extraction Applied to Speech Recognition. Ph.D. dissertation. Universit\u2019e de Paris VI"},{"key":"8865_CR48","unstructured":"Katagiri S, Lee C-H, Juang B-H (1991) Discriminative Multilayer Feed-Forward Networks. In: Proc. IEEE Worshop on Neural Networks for Signal Processing, pp 309\u2013318"},{"key":"8865_CR49","doi-asserted-by":"crossref","first-page":"978","DOI":"10.1126\/science.247.4945.978","volume":"247","author":"T Poggio","year":"1990","unstructured":"Poggio T, Girosi F (1990) Regularization Algorithms for Learning that are Equivalent to Multi-Layer Networks. Science 247:978\u2013982","journal-title":"Science"},{"key":"8865_CR50","doi-asserted-by":"crossref","unstructured":"Bishop CM (1995) Neural Networks for Pattern Recognition. Oxford University Press","DOI":"10.1201\/9781420050646.ptb6"},{"key":"8865_CR51","doi-asserted-by":"crossref","unstructured":"Specht D (1990) Probabilistic Neural Networks. Neural Networks (3):109\u2013118","DOI":"10.1016\/0893-6080(90)90049-Q"},{"issue":"6","key":"8865_CR52","first-page":"379","volume":"13","author":"D Rainton","year":"1992","unstructured":"Rainton D, Sagayama S (1992) Minimum Error Classification Training of HMMs\u2014Implementation Details and Experimental Results. jasj 13(6):379\u2013387","journal-title":"jasj"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-006-8865-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-006-8865-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-006-8865-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,7]],"date-time":"2023-05-07T22:01:14Z","timestamp":1683496874000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-006-8865-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,8]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2006,8]]}},"alternative-id":["8865"],"URL":"https:\/\/doi.org\/10.1007\/s10489-006-8865-0","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2006,8]]}}}