{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T12:03:53Z","timestamp":1598961833290},"publisher-location":"Berlin, Heidelberg","reference-count":200,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540005292","type":"print"},{"value":"9783540364344","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1007\/3-540-36434-x_4","type":"book-chapter","created":{"date-parts":[[2007,11,6]],"date-time":"2007-11-06T10:26:57Z","timestamp":1194344817000},"page":"118-183","source":"Crossref","is-referenced-by-count":118,"title":["An Introduction to Boosting and Leveraging"],"prefix":"10.1007","author":[{"given":"Ron","family":"Meir","sequence":"first","affiliation":[]},{"given":"Gunnar","family":"R\u00e4tsch","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2003,1,30]]},"reference":[{"key":"4_CR1","unstructured":"S. Abney, R. E. Schapire, and Y. Singer. Boosting applied to tagging and pp attachment. In Proc. of the Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora, 1999."},{"issue":"6","key":"4_CR2","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1109\/TAC.1974.1100705","volume":"19","author":"H. Akaike","year":"1974","unstructured":"H. Akaike. A new look at the statistical model identification. IEEE Trans. Automat. Control, 19(6):716\u2013723, 1974.","journal-title":"IEEE Trans. Automat. Control"},{"key":"4_CR3","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1162\/15324430152733133","volume":"1","author":"E. L. Allwein","year":"2000","unstructured":"E. L. Allwein, R. E. Schapire, and Y. Singer. Reducing multiclass to binary: A unifying approach for margin classifiers. Journal of Machine Learning Research, 1:113\u2013141, 2000.","journal-title":"Journal of Machine Learning Research"},{"key":"4_CR4","unstructured":"M. Anthony and P. L. Bartlett. Neural Network Learning: Theoretical Foundations. Cambridge University Press, 1999.","DOI":"10.1017\/CBO9780511624216","doi-asserted-by":"crossref"},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1162\/153244303768966111","volume":"3","author":"A. Antos","year":"2002","unstructured":"A. Antos, B. K\u00e9gl, T. Linder, and G. Lugosi. Data-dependent margin-based generalization bounds for classification. JMLR, 3:73\u201398, 2002.","journal-title":"JMLR"},{"key":"4_CR6","unstructured":"J. A. Aslam. Improving algorithms for boosting. In Proc. COLT, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR7","unstructured":"F. Audrino and P. B\u00fchlmann. Volatility estimation with functional gradient descent for very high-dimensional financial time series. Journal of Computational Finance., 2002. To appear. See http:\/\/www.stat.ethz.ch\/~buhlmann\/bibliog.html .","DOI":"10.21314\/JCF.2003.107","doi-asserted-by":"crossref"},{"key":"4_CR8","unstructured":"J. P. Barnes. Capacity control in boosting using a p-convex hull. Master\u2019s thesis, Australian National University, 1999. supervised by R. C. Williamson."},{"key":"4_CR9","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1023\/A:1013999503812","volume":"48","author":"P. Bartlett","year":"2002","unstructured":"P. Bartlett, P. Boucheron, and G. Lugosi. Model selction and error estimation. Machine Learning, 48:85\u20132002, 2002.","journal-title":"Machine Learning"},{"key":"4_CR10","unstructured":"P. L. Bartlett, O. Bousquet, and S. Mendelson. Localized rademacher averages. In Procedings COLT\u201902, volume 2375 of LNAI, pages 44\u201358, Sydney, 2002. Springer.","DOI":"10.1007\/3-540-45435-7_4","doi-asserted-by":"crossref"},{"key":"4_CR11","unstructured":"P. L. Bartlett and S. Mendelson. Rademacher and gaussian complexities: Risk bounds and structural results. Journal of Machine Learning Research, 2002. to appear 10\/02."},{"key":"4_CR12","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1023\/A:1007515423169","volume":"36","author":"E. Bauer","year":"1999","unstructured":"E. Bauer and R. Kohavi. An empirical comparison of voting classification algorithm: Bagging, boosting and variants. Machine Learning, 36:105\u2013142, 1999.","journal-title":"Machine Learning"},{"key":"4_CR13","first-page":"27","volume":"4","author":"H. H. Bauschke","year":"1997","unstructured":"H. H. Bauschke and J. M. Borwein. Legendre functions and the method of random Bregman projections. Journal of Convex Analysis, 4:27\u201367, 1997.","journal-title":"Journal of Convex Analysis"},{"key":"4_CR14","unstructured":"S. Ben-David, P. Long, and Y. Mansour. Agnostic boosting. In Proceedings of the Fourteenth Annual Conference on Computational Learning Theory, pages 507\u2013516, 2001.","DOI":"10.1007\/3-540-44581-1_33","doi-asserted-by":"crossref"},{"key":"4_CR15","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1080\/10556789408805554","volume":"3","author":"K. P. Bennett","year":"1993","unstructured":"K. P. Bennett and O. L. Mangasarian. Multicategory separation via linear programming. Optimization Methods and Software, 3:27\u201339, 1993.","journal-title":"Optimization Methods and Software"},{"key":"4_CR16","unstructured":"K. P. Bennett, A. Demiriz, and R. Maclin. Exploiting unlabeled data in ensemble methods. In Proc. ICML, 2002.","DOI":"10.1145\/775047.775090","doi-asserted-by":"crossref"},{"key":"4_CR17","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1080\/10556789208805504","volume":"1","author":"K. P. Bennett","year":"1992","unstructured":"K. P. Bennett and O. L. Mangasarian. Robust linear programming discrimination of two linearly inseparable sets. Optimization Methods and Software, 1:23\u201334, 1992.","journal-title":"Optimization Methods and Software"},{"key":"4_CR18","series-title":"Lect Notes Comput Sci","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1007\/BFb0020178","volume-title":"A boosting algorithm for regression","author":"A. Bertoni","year":"1997","unstructured":"A. Bertoni, P. Campadelli, and M. Parodi. A boosting algorithm for regression. In W. Gerstner, A. Germond, M. Hasler, and J.-D. Nicoud, editors, Proceedings ICANN\u201997, Int. Conf. on Artificial Neural Networks, volume V of LNCS, pages 343\u2013348, Berlin, 1997. Springer."},{"key":"4_CR19","author":"D. P. Bertsekas","year":"1995","unstructured":"D. P. Bertsekas. Nonlinear Programming. Athena Scientific, Belmont, MA, 1995.","volume-title":"Nonlinear Programming"},{"key":"4_CR20","unstructured":"C. M. Bishop. Neural Networks for Pattern Recognition. Oxford University Press, 1995.","DOI":"10.1201\/9781420050646.ptb6","doi-asserted-by":"crossref"},{"key":"4_CR21","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1016\/0020-0190(87)90114-1","volume":"24","author":"A. Blumer","year":"1987","unstructured":"A. Blumer, A. Ehrenfeucht, D. Haussler, and M. Warmuth. Occam\u2019s razor. Information Processing Letters, 24:377\u2013380, 1987.","journal-title":"Information Processing Letters"},{"key":"4_CR22","unstructured":"B. E. Boser, I. M. Guyon, and V. N. Vapnik. A training algorithm for optimal margin classifiers. In D. Haussler, editor, Proceedings of the 5th Annual ACM orkshop on Computational Learning Theory, pages 144\u2013152, 1992.","DOI":"10.1145\/130385.130401","doi-asserted-by":"crossref"},{"key":"4_CR23","unstructured":"P. S. Bradley and O. L. Mangasarian. Feature selection via concave minimization and support vector machines. In Proc. 15th International Conf. on Machine Learning, pages 82\u201390. Morgan Kaufmann, San Francisco, CA, 1998."},{"key":"4_CR24","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1016\/0041-5553(67)90040-7","volume":"7","author":"L. M. Bregman","year":"1967","unstructured":"L. M. Bregman. The relaxation method for finding the common point of convex sets and its application to the solution of problems in convex programming. USSR Computational Math. and Math. Physics, 7:200\u2013127, 1967.","journal-title":"USSR Computational Math. and Math. Physics"},{"issue":"2","key":"4_CR25","first-page":"123","volume":"26","author":"L. Breiman","year":"1996","unstructured":"L. Breiman. Bagging predictors. Machine Learning, 26(2):123\u2013140, 1996.","journal-title":"Machine Learning"},{"key":"4_CR26","unstructured":"L. Breiman. Bias, variance, and arcing classifiers. Technical Report 460, Statistics Department, University of California, July 1997."},{"issue":"7","key":"4_CR27","doi-asserted-by":"publisher","first-page":"1493","DOI":"10.1162\/089976699300016106","volume":"11","author":"L. Breiman","year":"1999","unstructured":"L. Breiman. Prediction games and arcing algorithms. Neural Computation, 11(7):1493\u20131518, 1999. Also Technical Report 504, Statistics Department, University of California Berkeley.","journal-title":"Neural Computation"},{"key":"4_CR28","unstructured":"L. Breiman. Some infinity theory for predictor ensembles. Technical Report 577, Berkeley, August 2000."},{"key":"4_CR29","unstructured":"L. Breiman, J. Friedman, J. Olshen, and C. Stone. Classification and Regression Trees. Wadsworth, 1984."},{"key":"4_CR30","unstructured":"N. Bshouty and D. Gavinsky. On boosting with polynomially bounded distributions. JMLR, pages 107\u2013111, 2002. Accepted."},{"key":"4_CR31","unstructured":"P. Buhlmann and B. Yu. Boosting with the l2 loss: Regression and classification. J. Amer. Statist. Assoc., 2002. revised, also Technical Report 605, Stat Dept, UC Berkeley August, 2001."},{"key":"4_CR32","unstructured":"C. Campbell and K. P. Bennett. A linear programming approach to novelty detection. In T. K. Leen, T. G. Dietterich, and V. Tresp, editors, Advances in Neural Information Processing Systems, volume 13, pages 395\u2013401. MIT Press, 2001."},{"key":"4_CR33","unstructured":"J. Carmichael. Non-intrusive appliance load monitoring system. Epri journal, Electric Power Research Institute, 1990."},{"key":"4_CR34","unstructured":"Y. Censor and S. A. Zenios. Parallel Optimization: Theory, Algorithms and Application. Numerical Mathematics and Scientific Computation. Oxford University Press, 1997."},{"issue":"4","key":"4_CR35","doi-asserted-by":"publisher","first-page":"1215","DOI":"10.1109\/18.335953","volume":"40","author":"N. Cesa-Bianchi","year":"1994","unstructured":"N. Cesa-Bianchi, A. Krogh, and M. Warmuth. Bounds on approximate steepest descent for likelihood maximization in exponential families. IEEE Transaction on Information Theory, 40(4):1215\u20131220, July 1994.","journal-title":"IEEE Transaction on Information Theory"},{"issue":"1","key":"4_CR36","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1023\/A:1012450327387","volume":"46","author":"O. Chapelle","year":"2002","unstructured":"O. Chapelle, V. Vapnik, O. Bousquet, and S. Mukherjee. Choosing multiple parameters for support vector machines. Machine Learning, 46(1):131\u2013159, 2002.","journal-title":"Machine Learning"},{"key":"4_CR37","unstructured":"S. Chen, D. Donoho, and M. Saunders. Atomic decomposition by basis pursuit. Technical Report 479, Department of Statistics, Stanford University, 1995."},{"key":"4_CR38","unstructured":"W. W. Cohen, R. E. Schapire, and Y. Singer. Learning to order things. In Michael I. Jordan, Michael J. Kearns, and Sara A. Solla, editors, Advances in Neural Information Processing Systems, volume 10. The MIT Press, 1998.","DOI":"10.1613\/jair.587","doi-asserted-by":"crossref"},{"issue":"1\u20133","key":"4_CR39","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1023\/A:1013912006537","volume":"48","author":"M. Collins","year":"2002","unstructured":"M. Collins, R. E. Schapire, and Y. Singer. Logistic Regression, AdaBoost and Bregman distances. Machine Learning, 48(1\u20133):253\u2013285, 2002. Special Issue on New Methods for Model Selection and Model Combination.","journal-title":"Machine Learning"},{"key":"4_CR40","unstructured":"R. Cominetti and J.-P. Dussault. A stable exponential penalty algorithm with superlinear convergence. J.O.T.A., 83(2), Nov 1994.","DOI":"10.1007\/BF02190058","doi-asserted-by":"crossref"},{"key":"4_CR41","first-page":"273","volume":"20","author":"C. Cortes","year":"1995","unstructured":"C. Cortes and V. N. Vapnik. Support vector networks. Machine Learning, 20:273\u2013297, 1995.","journal-title":"Machine Learning"},{"issue":"1","key":"4_CR42","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"13","author":"T. M. Cover","year":"1967","unstructured":"T. M. Cover and P. E. Hart. Nearest neighbor pattern classifications. IEEE transaction on information theory, 13(1):21\u201327, 1967.","journal-title":"IEEE transaction on information theory"},{"issue":"4","key":"4_CR43","doi-asserted-by":"publisher","first-page":"1676","DOI":"10.1214\/aos\/1176347872","volume":"18","author":"D. D. Cox","year":"1990","unstructured":"D. D. Cox and F. O\u2019sullivan. Asymptotic analysis of penalized likelihood and related estimates. The Annals of Statistics, 18(4):1676\u20131695, 1990.","journal-title":"The Annals of Statistics"},{"key":"4_CR44","unstructured":"K. Crammer and Y. Singer. On the learnability and design of output codes for multiclass problems. In N. Cesa-Bianchi and S. Goldberg, editors, Proc. Colt, pages 35\u201346, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR45","author":"N. Cristianini","year":"2000","unstructured":"N. Cristianini and J. Shawe-Taylor. An Introduction to Support Vector Machines. Cambridge University Press, Cambridge, UK, 2000.","volume-title":"An Introduction to Support Vector Machines"},{"issue":"4","key":"4_CR46","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/34.588021","volume":"19","author":"S. Della Pietra","year":"1997","unstructured":"S. Della Pietra, V. Della Pietra, and J. Lafferty. Inducing features of random fields. IEEE Transactions on Pattern Analysis and Machine Intelligence, 19(4):380\u2013393, April 1997.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"4_CR47","unstructured":"S. Della Pietra, V. Della Pietra, and J. Lafferty. Duality and auxiliary functions for Bregman distances. Technical Report CMU-CS-01-109, School of Computer Science, Carnegie Mellon University, 2001."},{"key":"4_CR48","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1023\/A:1012470815092","volume":"46","author":"A. Demiriz","year":"2002","unstructured":"A. Demiriz, K. P. Bennett, and J. Shawe-Taylor. Linear programming boosting via column generation. Journal of Machine Learning Research, 46:225\u2013254, 2002.","journal-title":"Journal of Machine Learning Research"},{"key":"4_CR49","unstructured":"M. Dettling and P. B\u00fchlmann. How to use boosting for tumor classification with gene expression data. Preprint. See http:\/\/www.stat.ethz.ch\/~dettling\/boosting , 2002.","DOI":"10.1093\/bioinformatics\/btf867","doi-asserted-by":"crossref"},{"key":"4_CR50","author":"L. Devroye","year":"1996","unstructured":"L. Devroye, L. Gy\u00f6r., and G. Lugosi. A Probabilistic Theory of Pattern Recognition. Number 31 in Applications of Mathematics. Springer, New York, 1996.","volume-title":"Applications of Mathematics"},{"issue":"2","key":"4_CR51","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1023\/A:1007607513941","volume":"40","author":"T. G. Dietterich","year":"1999","unstructured":"T. G. Dietterich. An experimental comparison of three methods for constructing ensembles of decision trees: Bagging, boosting, and randomization. Machine Learning, 40(2):139\u2013157, 1999.","journal-title":"Machine Learning"},{"key":"4_CR52","doi-asserted-by":"crossref","first-page":"263","DOI":"10.1613\/jair.105","volume":"2","author":"T. G. Dietterich","year":"1995","unstructured":"T. G. Dietterich and G. Bakiri. Solving multiclass learning problems via errorcorrecting output codes. Journal of Aritifical Intelligence Research, 2:263\u2013286, 1995.","journal-title":"Journal of Aritifical Intelligence Research"},{"key":"4_CR53","unstructured":"C. Domingo and O. Watanabe. A modification of AdaBoost. In Proc. COLT, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR54","unstructured":"H. Drucker, C. Cortes, L. D. Jackel, Y. LeCun, and V. Vapnik. Boosting and other ensemble methods. Neural Computation, 6, 1994.","DOI":"10.1162\/neco.1994.6.6.1289","doi-asserted-by":"crossref"},{"key":"4_CR55","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1142\/S0218001493000352","volume":"7","author":"H. Drucker","year":"1993","unstructured":"H. Drucker, R. E. Schapire, and P. Y. Simard. Boosting performance in neural networks. International Journal of Pattern Recognition and Artificial Intelligence, 7:705\u2013719, 1993.","journal-title":"International Journal of Pattern Recognition and Artificial Intelligence"},{"key":"4_CR56","unstructured":"N. Duffy and D. P. Helmbold. A geometric approach to leveraging weak learners. In P. Fischer and H. U. Simon, editors, Computational Learning Theory: 4th European Conference (EuroCOLT\u2019 99), pages 18\u201333, March 1999. Long version to appear in TCS.","DOI":"10.1007\/3-540-49097-3_3","doi-asserted-by":"crossref"},{"key":"4_CR57","unstructured":"N. Duffy and D. P. Helmbold. Boosting methods for regression. Technical report, Department of Computer Science, University of Santa Cruz, 2000."},{"key":"4_CR58","unstructured":"N. Duffy and D. P. Helmbold. Leveraging for regression. In Proc. COLT, pages 208\u2013219, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR59","unstructured":"N. Duffy and D. P. Helmbold. Potential boosters? In S. A. Solla, T. K. Leen, and K.-R. M\u00fcller, editors, Advances in Neural Information Processing Systems, volume 12, pages 258\u2013264. MIT Press, 2000."},{"key":"4_CR60","unstructured":"G. Escudero, L. M\u00e0rquez, and G. Rigau. Boosting applied to word sense disambiguation. In LNAI 1810: Proceedings of the 12th European Conference on Machine Learning, ECML, pages 129\u2013141, Barcelona, Spain, 2000.","DOI":"10.1007\/3-540-45164-1_14","doi-asserted-by":"crossref"},{"key":"4_CR61","unstructured":"W. Feller. An Introduction to Probability Theory and its Applications. Wiley, Chichester, third edition, 1968."},{"key":"4_CR62","unstructured":"D. H. Fisher, Jr., editor. Improving regressors using boosting techniques, 1997."},{"key":"4_CR63","unstructured":"M. Frean and T. Downs. A simple cost function for boosting. Technical report, Dep. of Computer Science and Electrical Engineering, University of Queensland, 1998."},{"issue":"2","key":"4_CR64","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1006\/inco.1995.1136","volume":"121","author":"Y. Freund","year":"1995","unstructured":"Y. Freund. Boosting a weak learning algorithm by majority. Information and Computation, 121(2):256\u2013285, September 1995.","journal-title":"Information and Computation"},{"issue":"3","key":"4_CR65","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1023\/A:1010852229904","volume":"43","author":"Y. Freund","year":"2001","unstructured":"Y. Freund. An adaptive version of the boost by majority algorithm. Machine Learning, 43(3):293\u2013318, 2001.","journal-title":"Machine Learning"},{"key":"4_CR66","unstructured":"Y. Freund, R. Iyer, R. E. Schapire, and Y. Singer. An efficient boosting algorithm for combining preferences. In Proc. ICML, 1998."},{"key":"4_CR67","author":"Y. Freund","year":"1994","unstructured":"Y. Freund and R. E. Schapire. A decision-theoretic generalization of on-line learning and an application to boosting. In EuroCOLT: European Conference on Computational Learning Theory. LNCS, 1994.","series-title":"Lect Notes Comput Sci","volume-title":"A decision-theoretic generalization of on-line learning and an application to boosting"},{"key":"4_CR68","unstructured":"Y. Freund and R. E. Schapire. Experiments with a new boosting algorithm. In Proc. 13th International Conference on Machine Learning, pages 148\u2013146. Morgan Kaufmann, 1996."},{"key":"4_CR69","unstructured":"Y. Freund and R. E. Schapire. Game theory, on-line prediction and boosting. In Proc. COLT, pages 325\u2013332, New York, NY, 1996. ACM Press.","DOI":"10.1145\/238061.238163","doi-asserted-by":"crossref"},{"issue":"1","key":"4_CR70","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1006\/jcss.1997.1504","volume":"55","author":"Y. Freund","year":"1997","unstructured":"Y. Freund and R. E. Schapire. A decision-theoretic generalization of on-line learning and an application to boosting. Journal of Computer and System Sciences, 55(1):119\u2013139, 1997.","journal-title":"Journal of Computer and System Sciences"},{"key":"4_CR71","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1006\/game.1999.0738","volume":"29","author":"Y. Freund","year":"1999","unstructured":"Y. Freund and R. E. Schapire. Adaptive game playing using multiplicative weights. Games and Economic Behavior, 29:79\u2013103, 1999.","journal-title":"Games and Economic Behavior"},{"issue":"5","key":"4_CR72","first-page":"771","volume":"14","author":"Y. Freund","year":"1999","unstructured":"Y. Freund and R. E. Schapire. A short introduction to boosting. Journal of Japanese Society for Artificial Intelligence, 14(5):771\u2013780, September 1999. Appeared in Japanese, translation by Naoki Abe.","journal-title":"Journal of Japanese Society for Artificial Intelligence"},{"key":"4_CR73","unstructured":"J. Friedman. Stochastic gradient boosting. Technical report, Stanford University, March 1999."},{"key":"4_CR74","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1214\/aos\/1016218223","volume":"2","author":"J. Friedman","year":"2000","unstructured":"J. Friedman, T. Hastie, and R. J. Tibshirani. Additive logistic regression: a statistical view of boosting. Annals of Statistics, 2:337\u2013374, 2000. with discussion pp.375-407, also Technical Report at Department of Statistics, Sequoia Hall, Stanford University.","journal-title":"Annals of Statistics"},{"key":"4_CR75","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1023\/A:1009778005914","volume":"I","author":"J. H. Friedman","year":"1997","unstructured":"J. H. Friedman. On bias, variance, 0\/1-loss, and the corse of dimensionality. In Data Mining and Knowledge Discovery, volume I, pages 55\u201377. Kluwer Academic Publishers, 1997.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"4_CR76","unstructured":"J. H. Friedman. Greedy function approximation. Technical report, Department of Statistics, Stanford University, February 1999."},{"key":"4_CR77","author":"K. R. Frisch","year":"1955","unstructured":"K. R. Frisch. The logarithmic potential method of convex programming. Memorandum, University Institute of Economics, Oslo, May 13 1955.","volume-title":"The logarithmic potential method of convex programming"},{"key":"4_CR78","unstructured":"T. Graepel, R. Herbrich, B. Sch\u00f6lkopf, A. J. Smola, P. L. Bartlett, K.-R. M\u00fcller, K. Obermayer, and R. C. Williamson. Classification on proximity data with LPmachines. In D. Willshaw and A. Murray, editors, Proceedings of ICANN\u201999, volume 1, pages 304\u2013309. IEE Press, 1999.","DOI":"10.1049\/cp:19991126","doi-asserted-by":"crossref"},{"key":"4_CR79","author":"Y. Grandvalet","year":"2001","unstructured":"Y. Grandvalet. Bagging can stabilize without reducing variance. In ICANN\u201901, Lecture Notes in Computer Science. Springer, 2001.","series-title":"Lect Notes Comput Sci","volume-title":"ICANN\u201901"},{"key":"4_CR80","unstructured":"Y. Grandvalet, F. D\u2019alch\u00e9-Buc, and C. Ambroise. Boosting mixture models for semi-supervised tasks. In Proc. ICANN, Vienna, Austria, 2001.","DOI":"10.1007\/3-540-44668-0_7","doi-asserted-by":"crossref"},{"key":"4_CR81","unstructured":"A. J. Grove and D. Schuurmans. Boosting in the limit: Maximizing the margin of learned ensembles. In Proceedings of the Fifteenth National Conference on Artifical Intelligence, 1998."},{"key":"4_CR82","unstructured":"V. Guruswami and A. Sahai. Multiclass learning, boosing, and error-correcting codes. In Proc. of the twelfth annual conference on Computational learning theory, pages 145\u2013155, New York, USA, 1999. ACM Press.","DOI":"10.1145\/307400.307429","doi-asserted-by":"crossref"},{"key":"4_CR83","unstructured":"W. Hart. Non-intrusive appliance load monitoring. Proceedings of the IEEE, 80(12), 1992.","DOI":"10.1109\/5.192069","doi-asserted-by":"crossref"},{"key":"4_CR84","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1023\/A:1007597902467","volume":"34","author":"M. Haruno","year":"1999","unstructured":"M. Haruno, S. Shirai, and Y. Ooyama. Using decision trees to construct a practical parser. Machine Learning, 34:131\u2013149, 1999.","journal-title":"Machine Learning"},{"key":"4_CR85","author":"T. Hastie","year":"2001","unstructured":"T. Hastie, R. Tibshirani, and J. Friedman. The Elements of Statistical Learning: data mining, inference and prediction. Springer series in statistics. Springer, New York, N.Y., 2001.","volume-title":"The Elements of Statistical Learning: data mining, inference and prediction","DOI":"10.1007\/978-0-387-21606-5","doi-asserted-by":"crossref"},{"key":"4_CR86","author":"T. J. Hastie","year":"1990","unstructured":"T. J. Hastie and R. J. Tibshirani. Generalized Additive Models, volume 43 of Monographs on Statistics and Applied Probability. Chapman & Hall, London, 1990.","volume-title":"Monographs on Statistics and Applied Probability"},{"key":"4_CR87","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1016\/0890-5401(92)90010-D","volume":"100","author":"D. Haussler","year":"1992","unstructured":"D. Haussler. Decision Theoretic Generalizations of the PAC Model for Neural Net and Other Learning Applications. Information and Computation, 100:78\u2013150, 1992.","journal-title":"Information and Computation"},{"key":"4_CR88","unstructured":"S. S. Haykin. Neural Networks: A Comprehensive Foundation. Prentice-Hall, second edition, 1998."},{"issue":"6","key":"4_CR89","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1109\/72.809075","volume":"10","author":"D. P. Helmbold","year":"1999","unstructured":"D. P. Helmbold, K. Kivinen, and M. K. Warmuth. Relative loss bounds for single neurons. IEEE Transactions on Neural Networks, 10(6):1291\u20131304, 1999.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"4_CR90","unstructured":"R. Herbrich. Learning Linear Classifiers: Theory and Algorithms, volume 7 of Adaptive Computation and Machine Learning. MIT Press, 2002.","DOI":"10.7551\/mitpress\/4170.001.0001","doi-asserted-by":"crossref"},{"key":"4_CR91","unstructured":"R. Herbrich, T. Graepel, and J. Shawe-Taylor. Sparsity vs. large margins for linear classifiers. In Proc. COLT, pages 304\u2013308, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR92","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1162\/153244303765208368","volume":"3","author":"R. Herbrich","year":"2002","unstructured":"R. Herbrich and R. Williamson. Algorithmic luckiness. JMLR, 3:175\u2013212, 2002.","journal-title":"JMLR"},{"key":"4_CR93","doi-asserted-by":"crossref","first-page":"380","DOI":"10.1137\/1035089","volume":"3","author":"R. Hettich","year":"1993","unstructured":"R. Hettich and K. O. Kortanek. Semi-infinite programming: Theory, methods and applications. SIAM Review, 3:380\u2013429, September 1993.","journal-title":"SIAM Review"},{"key":"4_CR94","unstructured":"F. J. Huang, Z.-H. Zhou, H.-J. Zhang, and T. Chen. Pose invariant face recognition. In Proceedings of the 4th IEEE International Conference on Automatic Face and Gesture Recognition, pages 245\u2013250, Grenoble, France, 2000."},{"key":"4_CR95","unstructured":"R. D. Iyer, D. D. Lewis, R. E. Schapire, Y. Singer, and A. Singhal. Boosting for document routing. In A. Agah, J. Callan, and E. Rundensteiner, editors, Proceedings of CIKM-00, 9th ACM International Conference on Information and Knowledge Management, pages 70\u201377, McLean, US, 2000. ACM Press, New York, US.","DOI":"10.1145\/354756.354794","doi-asserted-by":"crossref"},{"key":"4_CR96","unstructured":"W. James and C. Stein. Estimation with quadratic loss. In Proceedings of the Fourth Berkeley Symposium on Mathematics, Statistics and Probability, volume 1, pages 361\u2013380, Berkeley, 1960. University of California Press."},{"key":"4_CR97","unstructured":"W. Jiang. Some theoretical aspects of boosting in the presence of noisy data. In Proceedings of the Eighteenth International Conference on Machine Learning, 2001."},{"key":"4_CR98","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/0304-3975(78)90006-3","volume":"6","author":"D. S. Johnson","year":"1978","unstructured":"D. S. Johnson and F. P. Preparata. The densest hemisphere problem. Theoretical Computer Science, 6:93\u2013107, 1978.","journal-title":"Theoretical Computer Science"},{"issue":"2","key":"4_CR99","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1162\/neco.1994.6.2.181","volume":"6","author":"M. I. Jordan","year":"1994","unstructured":"M. I. Jordan and R. A. Jacobs. Hierarchical mixtures of experts and the em algorithm. Neural Computation, 6(2):181\u2013214, 1994.","journal-title":"Neural Computation"},{"key":"4_CR100","unstructured":"M. Kearns and Y. Mansour. On the boosting ability og top-down decision tree learning algorithms. In Proc. 28th ACM Symposium on the Theory of Computing,, pages 459\u2013468. ACM Press, 1996.","DOI":"10.1145\/237814.237994","doi-asserted-by":"crossref"},{"issue":"1","key":"4_CR101","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1145\/174644.174647","volume":"41","author":"M. Kearns","year":"1994","unstructured":"M. Kearns and L. Valiant. Cryptographic limitations on learning Boolean formulae and finite automata. Journal of the ACM, 41(1):67\u201395, January 1994.","journal-title":"Journal of the ACM"},{"key":"4_CR102","unstructured":"M. J. Kearns and U. V. Vazirani. An Introduction to Computational Learning Theory. MIT Press, 1994.","DOI":"10.7551\/mitpress\/3897.001.0001","doi-asserted-by":"crossref"},{"key":"4_CR103","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/0022-247X(71)90184-3","volume":"33","author":"G. S. Kimeldorf","year":"1971","unstructured":"G. S. Kimeldorf and G. Wahba. Some results on Tchebycheffian spline functions. J. Math. Anal. Applic., 33:82\u201395, 1971.","journal-title":"J. Math. Anal. Applic."},{"key":"4_CR104","unstructured":"J. Kivinen and M. Warmuth. Boosting as entropy projection. In Proc. 12th Annu. Conference on Comput. Learning Theory, pages 134\u2013144. ACM Press, New York, NY, 1999.","DOI":"10.1145\/307400.307424","doi-asserted-by":"crossref"},{"issue":"1\u20132","key":"4_CR105","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1016\/S0004-3702(97)00039-8","volume":"97","author":"J. Kivinen","year":"1997","unstructured":"J. Kivinen, M. Warmuth, and P. Auer. The perceptron algorithm vs. winnow: Linear vs. logarithmic mistake bounds when few input variables are relevant. Special issue of Artificial Intelligence, 97(1\u20132):325\u2013343, 1997.","journal-title":"Special issue of Artificial Intelligence"},{"issue":"1","key":"4_CR106","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1006\/inco.1996.2612","volume":"132","author":"J. Kivinen","year":"1997","unstructured":"J. Kivinen and M. K. Warmuth. Additive versus exponentiated gradient updates for linear prediction. Information and Computation, 132(1):1\u201364, 1997.","journal-title":"Information and Computation"},{"key":"4_CR107","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1007\/s002459900090","volume":"38","author":"K. C. Kiwiel","year":"1998","unstructured":"K. C. Kiwiel. Relaxation methods for strictly convex regularizations of piecewise linear programs. Applied Mathematics and Optimization, 38:239\u2013259, 1998.","journal-title":"Applied Mathematics and Optimization"},{"key":"4_CR108","unstructured":"V. Koltchinksii and D. Panchenko. Empirical margin distributions and bounding the generalization error of combined classifiers. Ann. Statis., 30(1), 2002.","DOI":"10.1214\/aos\/1015362183","doi-asserted-by":"crossref"},{"key":"4_CR109","unstructured":"A. Krieger, A. Wyner, and C. Long. Boosting noisy data. In Proceedings, 18th ICML. Morgan Kaufmann, 2001."},{"key":"4_CR110","unstructured":"J. Lafferty. Additive models, boosting, and inference for generalized divergences. In Proc. 12th Annu. Conf. on Comput. Learning Theory, pages 125\u2013133, New York, NY, 1999. ACM Press.","DOI":"10.1145\/307400.307422","doi-asserted-by":"crossref"},{"key":"4_CR111","unstructured":"G. Lebanon and J. Lafferty. Boosting and maximum likelihood for exponential models. In Advances in Neural information processings systems, volume 14, 2002. to appear. Longer version also NeuroCOLT Technical Report NC-TR-2001-098."},{"key":"4_CR112","unstructured":"Y. A. LeCun, L. D. Jackel, L. Bottou, A. Brunot, C. Cortes, J. S. Denker, H. Drucker, I. Guyon, U. A. M\u00fcller, E. S\u00e4ckinger, P. Y. Simard, and V. N. Vapnik. Comparison of learning algorithms for handwritten digit recognition. In F. Fogelman-Souli\u00e9 and P. Gallinari, editors, Proceedings ICANN\u201995-International Conference on Artificial Neural Networks, volume II, pages 53\u201360, Nanterre, France, 1995. EC2."},{"key":"4_CR113","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1016\/S0893-6080(05)80131-5","volume":"6","author":"M. Leshno","year":"1993","unstructured":"M. Leshno, V. Lin, A. Pinkus, and S. Schocken. Multilayer Feedforward Networks with a Nonpolynomial Activation Function Can Approximate any Function. Neural Networks, 6:861\u2013867, 1993.","journal-title":"Neural Networks"},{"key":"4_CR114","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/BF01277953","volume":"5","author":"N. Littlestone","year":"1995","unstructured":"N. Littlestone, P. M. Long, and M. K. Warmuth. On-line learning of linear functions. Journal of Computational Complexity, 5:1\u201323, 1995. Earlier version is Technical Report CRL-91-29 at UC Santa Cruz.","journal-title":"Journal of Computational Complexity"},{"key":"4_CR115","unstructured":"D. G. Luenberger. Linear and Nonlinear Programming. Addison-Wesley Publishing Co., Reading, second edition, May 1984. Reprinted with corrections in May, 1989."},{"key":"4_CR116","unstructured":"G\u00e1bor Lugosi and Nicolas Vayatis. A consistent strategy for boosting algorithms. In Proceedings of the Annual Conference on Computational Learning Theory, volume 2375 of LNAI, pages 303\u2013318, Sydney, February 2002. Springer.","DOI":"10.1007\/3-540-45435-7_21","doi-asserted-by":"crossref"},{"issue":"1","key":"4_CR117","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/BF00939948","volume":"72","author":"Z.-Q. Luo","year":"1992","unstructured":"Z.-Q. Luo and P. Tseng. On the convergence of coordinate descent method for convex differentiable minimization. Journal of Optimization Theory and Applications, 72(1):7\u201335, 1992.","journal-title":"Journal of Optimization Theory and Applications"},{"issue":"12","key":"4_CR118","doi-asserted-by":"publisher","first-page":"3397","DOI":"10.1109\/78.258082","volume":"41","author":"S. Mallat","year":"1993","unstructured":"S. Mallat and Z. Zhang. Matching Pursuits with time-frequency dictionaries. IEEE Transactions on Signal Processing, 41(12):3397\u20133415, December 1993.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"4_CR119","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1287\/opre.13.3.444","volume":"13","author":"O. L. Mangasarian","year":"1965","unstructured":"O. L. Mangasarian. Linear and nonlinear separation of patterns by linear programming. Operations Research, 13:444\u2013452, 1965.","journal-title":"Operations Research"},{"issue":"1","key":"4_CR120","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/S0167-6377(98)00049-2","volume":"24","author":"O. L. Mangasarian","year":"1999","unstructured":"O. L. Mangasarian. Arbitrary-norm separating plane. Operation Research Letters, 24(1):15\u201323, 1999.","journal-title":"Operation Research Letters"},{"key":"4_CR121","unstructured":"S. Mannor and R. Meir. Geometric bounds for generlization in boosting. In Proceedings of the Fourteenth Annual Conference on Computational Learning Theory, pages 461\u2013472, 2001.","DOI":"10.1007\/3-540-44581-1_30","doi-asserted-by":"crossref"},{"issue":"1\u20133","key":"4_CR122","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1023\/A:1013959922467","volume":"48","author":"S. Mannor","year":"2002","unstructured":"S. Mannor and R. Meir. On the existence of weak learners and applications to boosting. Machine Learning, 48(1\u20133):219\u2013251, 2002.","journal-title":"Machine Learning"},{"key":"4_CR123","unstructured":"S. Mannor, R. Meir, and T. Zhang. The consistency of greedy algorithms for classification. In Procedings COLT\u201902, volume 2375 of LNAI, pages 319\u2013333, Sydney, 2002. Springer.","DOI":"10.1007\/3-540-45435-7_22","doi-asserted-by":"crossref"},{"key":"4_CR124","unstructured":"L. Mason. Margins and Combined Classifiers. PhD thesis, Australian National University, September 1999."},{"key":"4_CR125","unstructured":"L. Mason, P. L. Bartlett, and J. Baxter. Improved generalization through explicit optimization of margins. Technical report, Department of Systems Engineering, Australian National University, 1998."},{"key":"4_CR126","author":"L. Mason","year":"1999","unstructured":"L. Mason, J. Baxter, P. L. Bartlett, and M. Frean. Functional gradient techniques for combining hypotheses. In A. J. Smola, P. L. Bartlett, B. Sch\u00f6lkopf, and C. Schuurmans, editors, Advances in Large Margin Classifiers. MIT Press, Cambridge, MA, 1999.","volume-title":"Advances in Large Margin Classifiers"},{"key":"4_CR127","author":"L. Mason","first-page":"221","year":"2000","unstructured":"L. Mason, J. Baxter, P. L. Bartlett, and M. Frean. Functional gradient techniques for combining hypotheses. In A. J. Smola, P. L. Bartlett, B. Sch\u00f6lkopf, and D. Schuurmans, editors, Advances in Large Margin Classifiers, pages 221\u2013247. MIT Press, Cambridge, MA, 2000.","volume-title":"Advances in Large Margin Classifiers"},{"key":"4_CR128","unstructured":"J. Matou\u0161ek. Geometric Discrepancy: An Illustrated Guide. Springer Verlag, 1999.","DOI":"10.1007\/978-3-642-03942-3","doi-asserted-by":"crossref"},{"key":"4_CR129","unstructured":"R. Meir, R. El-Yaniv, and Shai Ben-David. Localized boosting. In Proc. COLT, pages 190\u2013199, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR130","unstructured":"R. Meir and T. Zhang. Data-dependent bounds for bayesian mixture models. unpublished manuscript, 2002."},{"key":"4_CR131","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1098\/rsta.1909.0016","volume":"A 209","author":"J. Mercer","year":"1909","unstructured":"J. Mercer. Functions of positive and negative type and their connection with the theory of integral equations. Philos. Trans. Roy. Soc. London, A 209:415\u2013446, 1909.","journal-title":"Philos. Trans. Roy. Soc. London"},{"key":"4_CR132","series-title":"Lect Notes Comput Sci","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1007\/3-540-48219-9_4","volume-title":"Tuning cost-sensitive boosting and its application to melanoma diagnosis","author":"S. Merler","year":"2001","unstructured":"S. Merler, C. Furlanello, B. Larcher, and A. Sboner. Tuning cost-sensitive boosting and its application to melanoma diagnosis. In J. Kittler and F. Roli, editors, Proceedings of the 2nd Internationa Workshop on Multiple Classifier Systems MCS2001, volume 2096 of LNCS, pages 32\u201342. Springer, 2001."},{"key":"4_CR133","unstructured":"J. Moody. The effective number of parameters: An analysis of generalization and regularization in non-linear learning systems. In S. J. Hanson J. Moody and R. P. Lippman, editors, Advances in Neural information processings systems, volume 4, pages 847\u2013854, San Mateo, CA, 1992. Morgan Kaufman."},{"issue":"2","key":"4_CR134","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1109\/72.914517","volume":"12","author":"K.-R. M\u00fcller","year":"2001","unstructured":"K.-R. M\u00fcller, S. Mika, G. R\u00e4tsch, K. Tsuda, and B. Sch\u00f6lkopf. An introduction to kernel-based learning algorithms. IEEE Transactions on Neural Networks, 12(2):181\u2013201, 2001.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"4_CR135","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1109\/72.329683","volume":"5","author":"N. Murata","year":"1994","unstructured":"N. Murata, S. Amari, and S. Yoshizawa. Network information criterion-determining the number of hidden units for an artificial neural network model. IEEE Transactions on Neural Networks, 5:865\u2013872, 1994.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"4_CR136","author":"S. Nash","year":"1996","unstructured":"S. Nash and A. Sofer. Linear and Nonlinear Programming. McGraw-Hill, New York, NY, 1996.","volume-title":"Linear and Nonlinear Programming"},{"key":"4_CR137","unstructured":"Richard Nock and Patrice Lefaucheur. A robust boosting algorithm. In Proc. 13th European Conference on Machine Learning, volume LNAI 2430, Helsinki, 2002. Springer Verlag.","DOI":"10.1007\/3-540-36755-1_27","doi-asserted-by":"crossref"},{"key":"4_CR138","unstructured":"T. Onoda, G. R\u00e4tsch, and K.-R. M\u00fcller. An asymptotic analysis of AdaBoost in the binary classification case. In L. Niklasson, M. Bod\u00e9n, and T. Ziemke, editors, Proc. of the Int. Conf. on Artificial Neural Networks (ICANN\u201998), pages 195\u2013200, March 1998.","DOI":"10.1007\/978-1-4471-1599-1_26","doi-asserted-by":"crossref"},{"key":"4_CR139","unstructured":"T. Onoda, G. R\u00e4tsch, and K.-R. M\u00fcller. A non-intrusive monitoring system for household electric appliances with inverters. In H. Bothe and R. Rojas, editors, Proc. of NC\u20192000, Berlin, 2000. ICSC Academic Press Canada\/Switzerland."},{"key":"4_CR140","unstructured":"J. O\u2019sullivan, J. Langford, R. Caruana, and A. Blum. Featureboost: A metalearning algorithm that improves model robustness. In Proceedings, 17th ICML. Morgan Kaufmann, 2000."},{"key":"4_CR141","unstructured":"N. Oza and S. Russell. Experimental comparisons of online and batch versions of bagging and boosting. In Proc. KDD-01, 2001.","DOI":"10.1145\/502512.502565","doi-asserted-by":"crossref"},{"key":"4_CR142","unstructured":"R. El-Yaniv P. Derbeko and R. Meir. Variance optimized bagging. In Proc. 13th European Conference on Machine Learning, 2002.","DOI":"10.1007\/3-540-36755-1_6","doi-asserted-by":"crossref"},{"key":"4_CR143","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1126\/science.247.4945.978","volume":"247","author":"T. Poggio","year":"1990","unstructured":"T. Poggio and F. Girosi. Regularization algorithms for learning that are equivalent to multilayer networks. Science, 247:978\u2013982, 1990.","journal-title":"Science"},{"key":"4_CR144","unstructured":"J. R. Quinlan. C4.5: Programs for Machine Learning. Morgan Kaufmann, 1992."},{"key":"4_CR145","series-title":"Lect Notes Comput Sci","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1007\/3-540-61863-5_42","volume-title":"Boosting first-order learning","author":"J. R. Quinlan","year":"1996","unstructured":"J. R. Quinlan. Boosting first-order learning. Lecture Notes in Computer Science, 1160:143, 1996."},{"key":"4_CR146","unstructured":"G. R\u00e4tsch. Ensemble learning methods for classification. Master\u2019s thesis, Dep. of Computer Science, University of Potsdam, April 1998. In German."},{"key":"4_CR147","unstructured":"G. R\u00e4tsch. Robust Boosting via Convex Optimization. PhD thesis, University of Potsdam, Computer Science Dept., August-Bebel-Str. 89, 14482 Potsdam, Germany, October 2001."},{"key":"4_CR148","unstructured":"G. R\u00e4tsch. Robustes boosting durch konvexe optimierung. In D. Wagner et al., editor, Ausgezeichnete Informatikdissertationen 2001, volume D-2 of GI-Edition-Lecture Notes in Informatics (LNI), pages 125\u2013136. Bonner K\u00f6llen, 2002."},{"issue":"1\u20133","key":"4_CR149","first-page":"193","volume":"48","author":"G. R\u00e4tsch","year":"2002","unstructured":"G. R\u00e4tsch, A. Demiriz, and K. Bennett. Sparse regression ensembles in infinite and finite hypothesis spaces. Machine Learning, 48(1\u20133):193\u2013221, 2002. Special Issue on New Methods for Model Selection and Model Combination. Also NeuroCOLT2 Technical Report NC-TR-2000-085.","journal-title":"Machine Learning"},{"key":"4_CR150","unstructured":"G. R\u00e4tsch, S. Mika, B. Sch\u00f6lkopf, and K.-R. M\u00fcller. Constructing boosting algorithms from SVMs: an application to one-class classification. IEEE PAMI, 24(9), September 2002. In press. Earlier version is GMD TechReport No. 119, 2000.","DOI":"10.1109\/TPAMI.2002.1033211","doi-asserted-by":"crossref"},{"key":"4_CR151","unstructured":"G. R\u00e4tsch, S. Mika, and M. K. Warmuth. On the convergence of leveraging. NeuroCOLT2 Technical Report 98, Royal Holloway College, London, August 2001. A short version appeared in NIPS 14, MIT Press, 2002."},{"key":"4_CR152","unstructured":"G. R\u00e4tsch, S. Mika, and M. K. Warmuth. On the convergence of leveraging. In T. G. Dietterich, S. Becker, and Z. Ghahramani, editors, Advances in Neural information processings systems, volume 14, 2002. In press. Longer version also NeuroCOLT Technical Report NC-TR-2001-098."},{"issue":"3","key":"4_CR153","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007618119488","volume":"42","author":"G. R\u00e4tsch","year":"2001","unstructured":"G. R\u00e4tsch, T. Onoda, and K.-R. M\u00fcller. Soft margins for AdaBoost. Machine Learning, 42(3):287\u2013320, March 2001. also NeuroCOLT Technical Report NCTR-1998-021.","journal-title":"Machine Learning"},{"key":"4_CR154","author":"G. R\u00e4tsch","first-page":"207","year":"2000","unstructured":"G. R\u00e4tsch, B. Sch\u00f6lkopf, A. J. Smola, S. Mika, T. Onoda, and K.-R. M\u00fcller. Robust ensemble learning. In A. J. Smola, P. L. Bartlett, B. Sch\u00f6lkopf, and D. Schuurmans, editors, Advances in Large Margin Classifiers, pages 207\u2013219. MIT Press, Cambridge, MA, 2000.","volume-title":"Advances in Large Margin Classifiers"},{"key":"4_CR155","unstructured":"G. R\u00e4tsch, A. J. Smola, and S. Mika. Adapting codes and embeddings for polychotomies. In NIPS, volume 15. MIT Press, 2003. accepted."},{"key":"4_CR156","unstructured":"G. R\u00e4tsch, M. Warmuth, S. Mika, T. Onoda, S. Lemm, and K.-R. M\u00fcller. Barrier boosting. In Proc. COLT, pages 170\u2013179, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR157","unstructured":"G. R\u00e4tsch and M. K. Warmuth. Maximizing the margin with boosting. In Proc. COLT, volume 2375 of LNAI, pages 319\u2013333, Sydney, 2002. Springer."},{"key":"4_CR158","unstructured":"G. Ridgeway, D. Madigan, and T. Richardson. Boosting methodology for regression problems. In D. Heckerman and J. Whittaker, editors, Proceedings of Artificial Intelligence and Statistics\u2019 99, pages 152\u2013161, 1999."},{"key":"4_CR159","doi-asserted-by":"publisher","first-page":"465","DOI":"10.1016\/0005-1098(78)90005-5","volume":"14","author":"J. Rissanen","year":"1978","unstructured":"J. Rissanen. Modeling by shortest data description. Automatica, 14:465\u2013471, 1978.","journal-title":"Automatica"},{"key":"4_CR160","author":"C. P. Robert","year":"1994","unstructured":"C. P. Robert. The Bayesian Choice: A Decision Theoretic Motivation. Springer Verlag, New York, 1994.","volume-title":"The Bayesian Choice: A Decision Theoretic Motivation","DOI":"10.1007\/978-1-4757-4314-2","doi-asserted-by":"crossref"},{"key":"4_CR161","unstructured":"M. Rochery, R. Schapire, M. Rahim, N. Gupta, G. Riccardi, S. Bangalore, H. Alshawi, and S. Douglas. Combining prior knowledge and boosting for call classification in spoken language dialogue. In International Conference on Accoustics, Speech and Signal Processing, 2002.","DOI":"10.1109\/ICASSP.2002.5743646","doi-asserted-by":"crossref"},{"key":"4_CR162","author":"R. T. Rockafellar","year":"1970","unstructured":"R. T. Rockafellar. Convex Analysis. Princeton Landmarks in Mathemathics. Princeton University Press, New Jersey, 1970.","volume-title":"Convex Analysis","DOI":"10.1515\/9781400873173","doi-asserted-by":"crossref"},{"issue":"2","key":"4_CR163","first-page":"197","volume":"5","author":"R. E. Schapire","year":"1990","unstructured":"R. E. Schapire. The strength of weak learnability. Machine Learning, 5(2):197\u2013227, 1990.","journal-title":"Machine Learning"},{"key":"4_CR164","unstructured":"R. E. Schapire. Using output codes to boost multiclass learning problems. In Machine Learning: Proceedings of the 14th International Conference, pages 313\u2013321, 1997."},{"key":"4_CR165","unstructured":"R. E. Schapire. A brief introduction to boosting. In Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence, 1999."},{"key":"4_CR166","unstructured":"R. E. Schapire. The boosting approach to machine learning: An overview. In Workshop on Nonlinear Estimation and Classification. MSRI, 2002.","DOI":"10.1007\/978-0-387-21579-2_9","doi-asserted-by":"crossref"},{"issue":"5","key":"4_CR167","doi-asserted-by":"publisher","first-page":"1651","DOI":"10.1214\/aos\/1024691352","volume":"26","author":"R. E. Schapire","year":"1998","unstructured":"R. E. Schapire, Y. Freund, P. L. Bartlett, and W. S. Lee. Boosting the margin: A new explanation for the effectiveness of voting methods. The Annals of Statistics, 26(5):1651\u20131686, October 1998.","journal-title":"The Annals of Statistics"},{"issue":"3","key":"4_CR168","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1023\/A:1007614523901","volume":"37","author":"R. E. Schapire","year":"1999","unstructured":"R. E. Schapire and Y. Singer. Improved boosting algorithms using confidence-rated predictions. Machine Learning, 37(3):297\u2013336, December 1999. also Proceedings of the 14th Workshop on Computational Learning Theory 1998, pages 80-91.","journal-title":"Machine Learning"},{"issue":"2\/3","key":"4_CR169","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1023\/A:1007649029923","volume":"39","author":"R. E. Schapire","year":"2000","unstructured":"R. E. Schapire and Y. Singer. Boostexter: A boosting-based system for text categorization. Machine Learning, 39(2\/3):135\u2013168, 2000.","journal-title":"Machine Learning"},{"key":"4_CR170","unstructured":"R. E. Schapire, Y. Singer, and A. Singhal. Boosting and rocchio applied to text filtering. In Proc. 21st Annual International Conference on Research and Development in Information Retrieval, 1998.","DOI":"10.1145\/290941.290996","doi-asserted-by":"crossref"},{"key":"4_CR171","unstructured":"R. E. Schapire, P. Stone, D. McAllester, M. L. Littman, and J. A. Csirik. Modeling auction price uncertainty using boosting-based conditional density estimations noise. In Proceedings of the Proceedings of the Nineteenth International Conference on Machine Learning, 2002."},{"key":"4_CR172","unstructured":"B. Sch\u00f6lkopf, R. Herbrich, and A. J. Smola. A generalized representer theorem. In D. P. Helmbold and R. C. Williamson, editors, COLT\/EuroCOLT, volume 2111 of LNAI, pages 416\u2013426. Springer, 2001.","DOI":"10.1007\/3-540-44581-1_27","doi-asserted-by":"crossref"},{"key":"4_CR173","author":"B. Sch\u00f6lkopf","year":"1999","unstructured":"B. Sch\u00f6lkopf, J. Platt, J. Shawe-Taylor, A. J. Smola, and R. C. Williamson. Estimating the support of a high-dimensional distribution. TR 87, Microsoft Research, Redmond, WA, 1999.","series-title":"TR","volume-title":"Estimating the support of a high-dimensional distribution"},{"key":"4_CR174","doi-asserted-by":"publisher","first-page":"1207","DOI":"10.1162\/089976600300015565","volume":"12","author":"B. Sch\u00f6lkopf","year":"2000","unstructured":"B. Sch\u00f6lkopf, A. Smola, R. C. Williamson, and P. L. Bartlett. New support vector algorithms. Neural Computation, 12:1207\u20131245, 2000. also NeuroCOLT Technical Report NC-TR-1998-031.","journal-title":"Neural Computation"},{"key":"4_CR175","author":"B. Sch\u00f6lkopf","year":"2002","unstructured":"B. Sch\u00f6lkopf and A. J. Smola. Learning with Kernels. MIT Press, Cambridge, MA, 2002.","volume-title":"Learning with Kernels"},{"issue":"8","key":"4_CR176","doi-asserted-by":"publisher","first-page":"1869","DOI":"10.1162\/089976600300015178","volume":"12","author":"H. Schwenk","year":"2000","unstructured":"H. Schwenk and Y. Bengio. Boosting neural networks. Neural Computation, 12(8):1869\u20131887, 2000.","journal-title":"Neural Computation"},{"key":"4_CR177","unstructured":"R. A. Servedio. PAC analogoues of perceptron and winnow via boosting the margin. In Proc. COLT, pages 148\u2013157, San Francisco, 2000. Morgan Kaufmann."},{"key":"4_CR178","unstructured":"R. A. Servedio. Smooth boosting and learning with malicious noise. In Proceedings of the Fourteenth Annual Conference on Computational Learning Theory, pages 473\u2013489, 2001.","DOI":"10.1007\/3-540-44581-1_31","doi-asserted-by":"crossref"},{"issue":"5","key":"4_CR179","doi-asserted-by":"publisher","first-page":"1926","DOI":"10.1109\/18.705570","volume":"44","author":"J. Shawe-Taylor","year":"1998","unstructured":"J. Shawe-Taylor, P. L. Bartlett, R. C. Williamson, and M. Anthony. Structural risk minimization over data-dependent hierarchies. IEEE Trans. Inf. Theory, 44(5):1926\u20131940, September 1998.","journal-title":"IEEE Trans. Inf. Theory"},{"key":"4_CR180","unstructured":"J. Shawe-Taylor and N. Cristianini. Further results on the margin distribution. In Proceedings of the twelfth Conference on Computational Learning Theory, pages 278\u2013285, 1999.","DOI":"10.1145\/307400.307470","doi-asserted-by":"crossref"},{"key":"4_CR181","unstructured":"J. Shawe-Taylor and N. Cristianini. On the genralization of soft margin algorithms. Technical Report NC-TR-2000-082, NeuroCOLT2, June 2001."},{"key":"4_CR182","unstructured":"J. Shawe-Taylor and G. Karakoulas. Towards a strategy for boosting regressors. In A. J. Smola, P. L. Bartlett, B. Sch\u00f6lkopf, and D. Schuurmans, editors, Advances in Large Margin Classifiers, pages 247\u2013258, Cambridge, MA, 2000. MIT Press."},{"key":"4_CR183","unstructured":"Y. Singer. Leveraged vector machines. In S. A. Solla, T. K. Leen, and K.-R. M\u00fcller, editors, Advances in Neural Information Processing Systems, volume 12, pages 610\u2013616. MIT Press, 2000."},{"key":"4_CR184","unstructured":"D. Tax and R. Duin. Data domain description by support vectors. In M. Verleysen, editor, Proc. ESANN, pages 251\u2013256, Brussels, 1999. D. Facto Press."},{"key":"4_CR185","unstructured":"F. Thollard, M. Sebban, and P. Ezequel. Boosting density function estimators. In Proc. 13th European Conference on Machine Learning, volume LNAI 2430, pages 431\u2013443, Helsinki, 2002. Springer Verlag.","DOI":"10.1007\/3-540-36755-1_36","doi-asserted-by":"crossref"},{"key":"4_CR186","author":"A. N. Tikhonov","year":"1977","unstructured":"A. N. Tikhonov and V. Y. Arsenin. Solutions of Ill-posed Problems. W. H. Winston, Washington, D.C., 1977.","volume-title":"Solutions of Ill-posed Problems"},{"issue":"1","key":"4_CR187","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1109\/72.977272","volume":"13","author":"K. Tsuda","year":"2002","unstructured":"K. Tsuda, M. Sugiyama, and K.-R. M\u00fcller. Subspace information criterion for non-quadratic regularizers-model selection for sparse regressors. IEEE Transactions on Neural Networks, 13(1):70\u201380, 2002.","journal-title":"IEEE Transactions on Neural Networks"},{"issue":"11","key":"4_CR188","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1145\/1968.1972","volume":"27","author":"L. G. Valiant","year":"1984","unstructured":"L. G. Valiant. A theory of the learnable. Communications of the ACM, 27(11):1134\u20131142, November 1984.","journal-title":"Communications of the ACM"},{"key":"4_CR189","author":"A. W. Vaart van der","year":"1996","unstructured":"A. W. van der Vaart and J. A. Wellner. Weak Convergence and Empirical Processes. Springer Verlag, New York, 1996.","volume-title":"Weak Convergence and Empirical Processes","DOI":"10.1007\/978-1-4757-2545-2","doi-asserted-by":"crossref"},{"key":"4_CR190","author":"V. N. Vapnik","year":"1995","unstructured":"V. N. Vapnik. The nature of statistical learning theory. Springer Verlag, New York, 1995.","volume-title":"The nature of statistical learning theory","DOI":"10.1007\/978-1-4757-2440-0","doi-asserted-by":"crossref"},{"key":"4_CR191","author":"V. N. Vapnik","year":"1998","unstructured":"V. N. Vapnik. Statistical Learning Theory. Wiley, New York, 1998.","volume-title":"Statistical Learning Theory"},{"issue":"2","key":"4_CR192","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1137\/1116025","volume":"16","author":"V. N. Vapnik","year":"1971","unstructured":"V. N. Vapnik and A. Y. Chervonenkis. On the uniform convergence of relative frequencies of events to their probabilities. Theory of Probab. and its Applications, 16(2):264\u2013280, 1971.","journal-title":"Theory of Probab. and its Applications"},{"key":"4_CR193","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/BF01448847","volume":"100","author":"J. Neumann von","year":"1928","unstructured":"J. von Neumann. Zur Theorie der Gesellschaftsspiele. Math. Ann., 100:295\u2013320, 1928.","journal-title":"Math. Ann."},{"key":"4_CR194","unstructured":"M. A. Walker, O. Rambow, and M. Rogati. Spot: A trainable sentence planner. In Proc. 2nd Annual Meeting of the North American Chapter of the Assiciation for Computational Linguistics, 2001.","DOI":"10.3115\/1073336.1073339","doi-asserted-by":"crossref"},{"key":"4_CR195","unstructured":"R. Zemel and T. Pitassi. A gradient-based boosting algorithm for regression problems. In T. K. Leen, T. G. Dietterich, and V. Tresp, editors, Advances in Neural Information Processing Systems, volume 13, pages 696\u2013702. MIT Press, 2001."},{"key":"4_CR196","author":"T. Zhang","year":"2001","unstructured":"T. Zhang. Statistical behavior and consistency of classification methods based on convex risk minimization. Technical Report RC22155, IBM Research, Yorktown Heights, NY, 2001.","series-title":"Technical Report","volume-title":"Statistical behavior and consistency of classification methods based on convex risk minimization"},{"key":"4_CR197","unstructured":"T. Zhang. A general greedy approximation algorithm with applications. In Advances in Neural Information Processing Systems, volume 14. MIT Press, 2002."},{"key":"4_CR198","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/A:1012498226479","volume":"46","author":"T. Zhang","year":"2002","unstructured":"T. Zhang. On the dual formulation of regularized linear systems with convex risks. Machine Learning, 46:91\u2013129, 2002.","journal-title":"Machine Learning"},{"key":"4_CR199","unstructured":"T. Zhang. Sequential greedy approximation for certain convex optimization problems. Technical report, IBM T.J. Watson Research Center, 2002."},{"issue":"1","key":"4_CR200","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/S0933-3657(01)00094-X","volume":"24","author":"Z.-H. Zhou","year":"2002","unstructured":"Z.-H. Zhou, Y. Jiang, Y.-B. Yang, and S.-F. Chen. Lung cancer cell identification based on artificial neural network ensembles. Artificial Intelligence in Medicine, 24(1):25\u201336, 2002.","journal-title":"Artificial Intelligence in Medicine"}],"container-title":["Advanced Lectures on Machine Learning","Lecture Notes in Computer Science"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-36434-X_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,4]],"date-time":"2019-05-04T05:56:41Z","timestamp":1556949401000},"score":1.0,"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003]]},"ISBN":["9783540005292","9783540364344"],"references-count":200,"URL":"http:\/\/dx.doi.org\/10.1007\/3-540-36434-x_4","relation":{"cites":[]},"ISSN":["0302-9743"],"issn-type":[{"value":"0302-9743","type":"print"}]}}