{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T03:00:53Z","timestamp":1775098853595,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"1-3","license":[{"start":{"date-parts":[[2002,7,1]],"date-time":"2002-07-01T00:00:00Z","timestamp":1025481600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2002,7,1]],"date-time":"2002-07-01T00:00:00Z","timestamp":1025481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[2002,7]]},"DOI":"10.1023\/a:1013999503812","type":"journal-article","created":{"date-parts":[[2002,12,28]],"date-time":"2002-12-28T13:55:48Z","timestamp":1041083748000},"page":"85-113","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":224,"title":["Model Selection and Error Estimation"],"prefix":"10.1007","volume":"48","author":[{"given":"Peter L.","family":"Bartlett","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"St\u00e9phane","family":"Boucheron","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"G\u00e1bor","family":"Lugosi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"393392_CR1","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1109\/TAC.1974.1100705","volume":"19","author":"H. Akaike","year":"1974","unstructured":"Akaike, H. (1974). A new look at the statistical model identification. IEEE Transactions on Automatic Control, 19, 716\u2013723.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"393392_CR2","unstructured":"Barron, A. R. (1985). Logically smooth density estimation. Technical Report TR 56, Department of Statistics, Stanford University."},{"key":"393392_CR3","series-title":"NATO ASI Series","doi-asserted-by":"crossref","first-page":"561","DOI":"10.1007\/978-94-011-3222-0_42","volume-title":"Nonparametric functional estimation and related topics","author":"A. R. Barron","year":"1991","unstructured":"Barron, A. R. (1991). Complexity regularization with application to artificial neural networks. In G. Roussas, (Ed.), Nonparametric functional estimation and related topics (pp. 561\u2013576). NATO ASI Series, Dordrecht: Kluwer Academic Publishers."},{"key":"393392_CR4","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1007\/s004400050210","volume":"113","author":"A. R. Barron","year":"1999","unstructured":"Barron, A. R., Birg\u00e9, L., & Massart, P. (1999). Risk bounds for model selection via penalization. Probability Theory and Related Fields, 113, 301\u2013413.","journal-title":"Probability Theory and Related Fields"},{"key":"393392_CR5","doi-asserted-by":"crossref","first-page":"1034","DOI":"10.1109\/18.86996","volume":"37","author":"A. R. Barron","year":"1991","unstructured":"Barron, A. R., & Cover, T. M. (1991). Minimum complexity density estimation. IEEE Transactions on Information Theory, 37, 1034\u20131054.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"2","key":"393392_CR6","doi-asserted-by":"crossref","first-page":"525","DOI":"10.1109\/18.661502","volume":"44","author":"P. L. Bartlett","year":"1998","unstructured":"Bartlett, P. L. (1998). The sample complexity of pattern classification with neural networks: The size of the weights is more important than the size of the network. IEEE Transactions on Information Theory, 44:2, 525\u2013536.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR7","first-page":"43","volume-title":"Advances in Kernel methods: Support vector learning","author":"P. L. Bartlett","year":"1999","unstructured":"Bartlett, P. L., & Shawe-Taylor, J. (1999). Generalization performance of support vector machines and other pattern classifiers. In B. Sch\u00f6lkopf, C. J. C. Burges, & A. J., Smola. (Eds.), Advances in Kernel methods: Support vector learning (pp. 43\u201354). Cambridge: MIT Press."},{"key":"393392_CR8","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/978-1-4612-1880-7_4","volume-title":"Festschrift for Lucien Le Cam: Research papers in probability and statistics","author":"L. Birg\u00e9","year":"1997","unstructured":"Birg\u00e9, L., & Massart, P. (1997). From model selection to adaptive estimation. In E. Torgersen, D. Pollard, & G. Yang, (Eds.), Festschrift for Lucien Le Cam: Research papers in probability and statistics (pp. 55\u201387). New York: Springer."},{"key":"393392_CR9","doi-asserted-by":"crossref","first-page":"329","DOI":"10.2307\/3318720","volume":"4","author":"L. Birg\u00e9","year":"1998","unstructured":"Birg\u00e9, L., & Massart, P. (1998). Minimum contrast estimators on sieves: Exponential bounds and rates of convergence. Bernoulli, 4, 329\u2013375.","journal-title":"Bernoulli"},{"key":"393392_CR10","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1002\/(SICI)1098-2418(200005)16:3<277::AID-RSA4>3.0.CO;2-1","volume":"16","author":"S. Boucheron","year":"2000","unstructured":"Boucheron, S., Lugosi, G., & Massart, P. (2000). A sharp concentration inequality with applications in random combinatorics and learning. Random Structures and Algorithms, 16, 277\u2013292.","journal-title":"Random Structures and Algorithms"},{"key":"393392_CR11","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1109\/9.489275","volume":"41","author":"K. L. Buescher","year":"1996","unstructured":"Buescher, K. L., & Kumar, P. R. (1996a). Learning by canonical smooth estimation, Part I: Simultaneous estimation. IEEE Transactions on Automatic Control, 41, 545\u2013556.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"393392_CR12","doi-asserted-by":"crossref","first-page":"557","DOI":"10.1109\/9.489276","volume":"41","author":"K. L. Buescher","year":"1996","unstructured":"Buescher, K. L., & Kumar, P. R. (1996b). Learning by canonical smooth estimation, Part II: Learning and choice of model complexity. IEEE Transactions on Automatic Control, 41, 557\u2013569.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"393392_CR13","volume-title":"An introduction to support vector machines","author":"N. Cristianini","year":"2000","unstructured":"Cristianini, N., & Shawe-Taylor, J. (2000). An introduction to support vector machines. Cambridge, UK: Cambridge University Press."},{"key":"393392_CR14","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-0711-5","volume-title":"A probabilistic theory of pattern recognition","author":"L. Devroye","year":"1996","unstructured":"Devroye, L., Gy\u00f6rfi, L., & Lugosi, G. (1996). A probabilistic theory of pattern recognition. New York: Springer-Verlag."},{"key":"393392_CR15","doi-asserted-by":"crossref","unstructured":"Freund, Y. (1998). Self bounding learning algorithms. In Proceedings of the Eleventh Annual Conference on Computational Learning Theory (pp. 247-258).","DOI":"10.1145\/279943.279993"},{"key":"393392_CR16","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316719","volume-title":"Nonlinear statistical models","author":"A. R. Gallant","year":"1987","unstructured":"Gallant, A. R. (1987). Nonlinear statistical models. New York: John Wiley."},{"key":"393392_CR17","doi-asserted-by":"crossref","first-page":"401","DOI":"10.1214\/aos\/1176345782","volume":"10","author":"S. Geman","year":"1982","unstructured":"Geman, S., & Hwang, C. R. (1982). Nonparametric maximum likelihood estimation by the method of sieves. Annals of Statistics, 10, 401\u2013414.","journal-title":"Annals of Statistics"},{"key":"393392_CR18","doi-asserted-by":"crossref","first-page":"929","DOI":"10.1214\/aop\/1176993138","volume":"12","author":"E. Gin\u00e9","year":"1984","unstructured":"Gin\u00e9, E., & Zinn, J. (1984). Some limit theorems for empirical processes. Annals of Probability, 12, 929\u2013989.","journal-title":"Annals of Probability"},{"key":"393392_CR19","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1080\/01621459.1963.10500830","volume":"58","author":"W. Hoeffding","year":"1963","unstructured":"Hoeffding, W. (1963). Probability inequalities for sums of bounded random variables. Journal of the American Statistical Association, 58, 13\u201330.","journal-title":"Journal of the American Statistical Association"},{"key":"393392_CR20","first-page":"21","volume-title":"Proceedings of the Eighth Annual ACM Workshop on Computational Learning Theory","author":"M. Kearns","year":"1995","unstructured":"Kearns, M., Mansour, Y., Ng, A. Y., & Ron, D. (1995). An experimental and theoretical comparison of model selection methods. In Proceedings of the Eighth Annual ACM Workshop on Computational Learning Theory (pp. 21\u201330). New York: Association for Computing Machinery."},{"issue":"5","key":"393392_CR21","doi-asserted-by":"crossref","first-page":"1902","DOI":"10.1109\/18.930926","volume":"47","author":"V. Koltchinskii","year":"2001","unstructured":"Koltchinskii,V. (2001). Rademacher penalties and structural risk minimization. IEEE Transactions on Information Theory, 47:5, 1902\u20131914.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR22","series-title":"Prog. Probab.","first-page":"443","volume-title":"High dimensional probability II. 2nd international conference","author":"V. Koltchinskii","year":"2000","unstructured":"Koltchinskii, V., & Panchenko, D. (2000). Rademacher processes and bounding the risk of function learning. In Gin\u00e9, Evarist et al. (eds.), High dimensional probability II. 2nd international conference, Boston: Birkh\u00e4user. Prog. Probab., 47, 443\u2013457."},{"key":"393392_CR23","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1109\/72.661120","volume":"9","author":"A. Krzy\u00ffzak","year":"1998","unstructured":"Krzy\u00ffzak, A., & Linder, T. (1998). Radial basis function networks and complexity regularization in function learning. IEEE Transactions on Neural Networks, 9, 247\u2013256.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"393392_CR24","unstructured":"Lozano, F. (2000). Model selection using Rademacher penalization. In Proceedings of the Second ICSC Symposia on Neural Computation (NC2000), ICSC Adademic Press."},{"key":"393392_CR25","doi-asserted-by":"crossref","unstructured":"Lugosi, G., & Nobel, A. (1999). Adaptive model selection using empirical complexities. Annals of Statistics, 27:6.","DOI":"10.1214\/aos\/1017939241"},{"key":"393392_CR26","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1109\/18.382014","volume":"41","author":"G. Lugosi","year":"1995","unstructured":"Lugosi, G., & Zeger, K. (1995). Nonparametric estimation via empirical risk minimization. IEEE Transactions on Information Theory, 41, 677\u2013678.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR27","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1109\/18.481777","volume":"42","author":"G. Lugosi","year":"1996","unstructured":"Lugosi, G., & Zeger, K. (1996). Concept learning using complexity regularization. IEEE Transactions on Information Theory, 42, 48\u201354.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR28","first-page":"661","volume":"15","author":"C. L. Mallows","year":"1997","unstructured":"Mallows, C. L. (1997). Some comments on cp. IEEE Technometrics, 15, 661\u2013675.","journal-title":"IEEE Technometrics"},{"issue":"3","key":"393392_CR29","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1023\/A:1007697429651","volume":"38","author":"L. Mason","year":"2000","unstructured":"Mason, L., Bartlett, P. L., & Baxter, J. (2000). Improved generalization through explicit optimization of margins. Machine Learning, 38:3, 243\u2013255.","journal-title":"Machine Learning"},{"issue":"6","key":"393392_CR30","first-page":"245","volume":"IX","author":"P. Massart","year":"2000","unstructured":"Massart, P. (2000). Some applications of concentration inequalities to statistics. Annales de la facult\u00e9 des sciences de l'Universit\u00e9de Toulouse, Math\u00e9matiques, s\u00e9rie 6, IX, 245\u2013303.","journal-title":"Annales de la facult\u00e9 des sciences de l'Universit\u00e9de Toulouse, Math\u00e9matiques"},{"key":"393392_CR31","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1017\/CBO9781107359949.008","volume-title":"Surveys in Combinatorics 1989","author":"C. McDiarmid","year":"1989","unstructured":"McDiarmid, C. (1989). On the method of bounded differences. In Surveys in Combinatorics 1989 (pp. 148\u2013188). Cambridge: Cambridge University Press."},{"key":"393392_CR32","volume-title":"Leda: A platform for combinatorial and geometric computing","author":"K. Mehlhorn","year":"2000","unstructured":"Mehlhorn, K., & Naher, S. (2000). Leda: A platform for combinatorial and geometric computing. Cambridge: Cambridge University Press."},{"key":"393392_CR33","first-page":"122","volume-title":"Proceedings of the Tenth Annual ACM Workshop on Computational Learning Theory","author":"R. Meir","year":"1997","unstructured":"Meir, R. (1997). Performance bounds for nonlinear time series prediction. In Proceedings of the Tenth Annual ACM Workshop on Computational Learning Theory (pp. 122\u2013129). New York: Association for Computing Machinery."},{"key":"393392_CR34","doi-asserted-by":"crossref","first-page":"2133","DOI":"10.1109\/18.556602","volume":"42","author":"D. S. Modha","year":"1996","unstructured":"Modha, D. S., & Masry, E. (1996). Minimum complexity regression estimation with weakly dependent observations. IEEE Transactions on Information Theory, 42, 2133\u20132145.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR35","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1214\/aos\/1176346150","volume":"11","author":"J. Rissanen","year":"1983","unstructured":"Rissanen, J. (1983). A universal prior for integers and estimation by minimum description length. Annals of Statistics, 11, 416\u2013431.","journal-title":"Annals of Statistics"},{"issue":"5","key":"393392_CR36","first-page":"1651","volume":"26","author":"R. E. Schapire","year":"1998","unstructured":"Schapire, R. E., Freund, Y., Bartlett, P. L., & Lee, W. S. (1998). Boosting the margin: A new explanation for the effectiveness of voting methods. Annals of Statistics, 26:5, 1651\u20131686.","journal-title":"Annals of Statistics"},{"key":"393392_CR37","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1214\/aos\/1176344136","volume":"6","author":"G. Schwarz","year":"1978","unstructured":"Schwarz, G. (1978). Estimating the dimension of a model. Annals of Statistics, 6, 461\u2013464.","journal-title":"Annals of Statistics"},{"issue":"5","key":"393392_CR38","doi-asserted-by":"crossref","first-page":"1926","DOI":"10.1109\/18.705570","volume":"44","author":"J. Shawe-Taylor","year":"1998","unstructured":"Shawe-Taylor, J., Bartlett, P. L., Williamson, R. C., & Anthony, M. (1998). Structural risk minimization over data-dependent hierarchies. IEEE Transactions on Information Theory, 44:5, 1926\u20131940.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR39","doi-asserted-by":"crossref","first-page":"580","DOI":"10.1214\/aos\/1176325486","volume":"22","author":"X. Shen","year":"1994","unstructured":"Shen, X., & Wong, W. H. (1994). Convergence rate of sieve estimates. Annals of Statistics, 22, 580\u2013615.","journal-title":"Annals of Statistics"},{"key":"393392_CR40","doi-asserted-by":"crossref","first-page":"197","DOI":"10.4064\/sm-58-2-197-208","volume":"63","author":"S. J. Szarek","year":"1976","unstructured":"Szarek, S. J. (1976). On the best constants in the Khintchine inequality. Studia Mathematica, 63, 197\u2013208.","journal-title":"Studia Mathematica"},{"key":"393392_CR41","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1007\/BF02699376","volume":"81","author":"M. Talagrand","year":"1995","unstructured":"Talagrand, M. (1995). Concentration of measure and isoperimetric inequalities in product spaces. Inst. Hautes Etudes Sci. Publ. Math., 81, 73\u2013205.","journal-title":"Inst. Hautes Etudes Sci. Publ. Math."},{"key":"393392_CR42","volume-title":"Estimation of dependencies based on empirical data","author":"V. N. Vapnik","year":"1982","unstructured":"Vapnik, V. N. (1982). Estimation of dependencies based on empirical data. New York: Springer-Verlag."},{"key":"393392_CR43","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-2440-0","volume-title":"The nature of statistical learning theory","author":"V. N. Vapnik","year":"1995","unstructured":"Vapnik, V. N. (1995). The nature of statistical learning theory. New York: Springer-Verlag."},{"key":"393392_CR44","volume-title":"Statistical learning theory","author":"V. N. Vapnik","year":"1998","unstructured":"Vapnik, V. N. (1998). Statistical learning theory. New York: Wiley."},{"key":"393392_CR45","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1137\/1116025","volume":"16","author":"V. N. Vapnik","year":"1971","unstructured":"Vapnik, V. N., & Chervonenkis, A. Ya. (1971). On the uniform convergence of relative frequencies of events to their probabilities. Theory of Probability and its Applications, 16, 264\u2013280.","journal-title":"Theory of Probability and its Applications"},{"key":"393392_CR46","volume-title":"Theory of pattern recognition","author":"V. N. Vapnik","year":"1974","unstructured":"Vapnik, V. N., & Chervonenkis, A. Ya. (1974). Theory of pattern recognition. Moscow: Nauka. (in Russian); German translation (1979): Theorie der Zeichenerkennung. Berlin: Akademie Verlag."},{"issue":"5","key":"393392_CR47","doi-asserted-by":"crossref","first-page":"851","DOI":"10.1162\/neco.1994.6.5.851","volume":"6","author":"V. N. Vapnik","year":"1994","unstructured":"Vapnik, V. N., Levin, E., & Le Cun, Y. (1994). Measuring the VC-dimension of a learning machine. Neural Computation, 6:5, 851\u2013876.","journal-title":"Neural Computation"},{"key":"393392_CR48","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-2545-2","volume-title":"Weak convergence and empirical processes","author":"A. W. van der Vaart","year":"1996","unstructured":"van der Vaart, A. W., & Wellner, J. A. (1996). Weak convergence and empirical processes. New York: Springer-Verlag."},{"key":"393392_CR49","unstructured":"Williamson, R. C., Shawe-Taylor, J., Sch\u00f6lkopf, B., & Smola, A. J. (1999). Sample based generalization bounds. NeuroCOLT Technical Report NC-TR-99-055."},{"key":"393392_CR50","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1109\/18.650993","volume":"44","author":"Y. Yang","year":"1998","unstructured":"Yang, Y., & Barron, A. R. (1998). An asymptotic property of model selection criteria. IEEE Transactions on Information Theory, 44, 95\u2013116.","journal-title":"IEEE Transactions on Information Theory"},{"key":"393392_CR51","doi-asserted-by":"crossref","first-page":"1564","DOI":"10.1214\/aos\/1017939142","volume":"27","author":"Y. Yang","year":"1999","unstructured":"Yang, Y., & Barron, A. R. (1999). Information-theoretic determination of minimax rates of convergence. Annals of Statistics, 27, 1564\u20131599.","journal-title":"Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1013999503812.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1013999503812\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1013999503812.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:43:03Z","timestamp":1752147783000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1013999503812"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,7]]},"references-count":51,"journal-issue":{"issue":"1-3","published-print":{"date-parts":[[2002,7]]}},"alternative-id":["393392"],"URL":"https:\/\/doi.org\/10.1023\/a:1013999503812","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2002,7]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}