{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:31:28Z","timestamp":1740123088506,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2021,9,2]],"date-time":"2021-09-02T00:00:00Z","timestamp":1630540800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,9,2]],"date-time":"2021-09-02T00:00:00Z","timestamp":1630540800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","award":["F4FGA08272G001"],"award-info":[{"award-number":["F4FGA08272G001"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s10994-021-06036-0","type":"journal-article","created":{"date-parts":[[2021,9,3]],"date-time":"2021-09-03T20:11:14Z","timestamp":1630699874000},"page":"2933-2951","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Diametrical Risk Minimization: theory and computations"],"prefix":"10.1007","volume":"112","author":[{"given":"Matthew D.","family":"Norton","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Johannes O.","family":"Royset","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,2]]},"reference":[{"key":"6036_CR1","unstructured":"Bartlett, P., Foster, D., & Telgarsky, M. (2017). Spectrally-normalized margin bounds for neural networks. In Advances in neural information processing systems (pp. 6240\u20136249)."},{"issue":"4","key":"6036_CR2","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1287\/moor.23.4.769","volume":"23","author":"A Ben-Tal","year":"1998","unstructured":"Ben-Tal, A., & Nemirovski, A. (1998). Robust convex optimization. Mathematics of Operations Research, 23(4), 769\u2013805.","journal-title":"Mathematics of Operations Research"},{"issue":"3","key":"6036_CR3","doi-asserted-by":"publisher","first-page":"931","DOI":"10.1016\/j.ejor.2017.03.051","volume":"270","author":"D Bertsimas","year":"2018","unstructured":"Bertsimas, D., & Copenhaver, M. (2018). Characterization of the equivalence of robustification and regularization in linear and matrix regression. European Journal of Operational Research, 270(3), 931\u2013942.","journal-title":"European Journal of Operational Research"},{"issue":"1","key":"6036_CR4","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/s10107-017-1174-z","volume":"171","author":"D Bertsimas","year":"2018","unstructured":"Bertsimas, D., Gupta, V., & Kallus, N. (2018). Robust sample average approximation. Mathematical Programming, 171(1), 217\u2013282.","journal-title":"Mathematical Programming"},{"key":"6036_CR5","volume-title":"Concentration inequalities: A nonasymptotic theory of independence","author":"S Boucheron","year":"2016","unstructured":"Boucheron, S., Lugosi, G., & Massart, P. (2016). Concentration inequalities: A nonasymptotic theory of independence. Oxford: Oxford University Press."},{"key":"6036_CR6","first-page":"499","volume":"2","author":"O Bousquet","year":"2002","unstructured":"Bousquet, O., & Elisseeff, A. (2002). Stability and generalization. Journal of Machine Learning Research, 2, 499\u2013526.","journal-title":"Journal of Machine Learning Research"},{"key":"6036_CR7","unstructured":"Carmon, Y., Raghunathan, A., Schmidt, L., Liang, P., & Duchi, J.\u00a0C. (2019). Unlabeled data improves adversarial robustness. arXiv:1905.13736."},{"key":"6036_CR8","unstructured":"Charles, Z., & Papailiopoulos, D. (2018). Stability and generalization of learning algorithms that converge to global optima. In International conference on machine learning (pp. 744\u2013753)."},{"issue":"12","key":"6036_CR9","doi-asserted-by":"publisher","first-page":"124018","DOI":"10.1088\/1742-5468\/ab39d9","volume":"2019","author":"P Chaudhari","year":"2017","unstructured":"Chaudhari, P., Choromanska, A., Soatto, S., LeCun, Y., Baldassi, C., Borgs, C., et al. (2017). Entropy-sgd: Biasing gradient descent into wide valleys. Journal of Statistical Mechanics: Theory and Experiment, 2019(12), 124018.","journal-title":"Journal of Statistical Mechanics: Theory and Experiment"},{"key":"6036_CR10","unstructured":"Cohen, J.\u00a0M., Rosenfeld, E., & Kolter, J.\u00a0Z. (2019). Certified adversarial robustness via randomized smoothing. In Proceedings of the 36th international conference on machine learning (ICML)."},{"key":"6036_CR11","doi-asserted-by":"crossref","unstructured":"Dong, Z., Yao, Z., Gholami, A., Keutzer, K., & Mahoney, M. W. (2019). HAWQ: Hessian aware quantization of neural networks with mixed-precision. In Proceedings of the international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00038"},{"key":"6036_CR12","unstructured":"Duchi, J., Glynn, P., & Namkoong, H. (2018). Statistics of robust optimization: A generalized empirical likelihood approach. arXiv:1610.03425."},{"key":"6036_CR13","unstructured":"Gong, C., Ren, T., Ye, M., & Liu, Q. (2020). Maxup: A simple way to improve generalization of neural network training. arXiv:2002.09024."},{"key":"6036_CR14","unstructured":"Gouk, H., Frank, E., Pfahringer, B., & Cree, M. (2018). Regularisation of neural networks by enforcing Lipschitz continuity. arXiv:1804.04368."},{"key":"6036_CR15","unstructured":"Hardt, M., Recht, B., & Singer, Y. (2016). Train faster, generalize better: Stability of stochastic gradient descent. In International conference on machine learning (pp. 1225\u20131234)."},{"issue":"1","key":"6036_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/neco.1997.9.1.1","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Flat minima. Neural Computation, 9(1), 1\u201342.","journal-title":"Neural Computation"},{"key":"6036_CR17","unstructured":"Keskar, N.\u00a0S., Mudigere, D., Nocedal, J., Smelyanskiy, M., & Tang, P.\u00a0T.\u00a0P. (2016). On large-batch training for deep learning: Generalization gap and sharp minima. arXiv:1609.04836."},{"key":"6036_CR18","unstructured":"Lewis, A. (2002). Robust regularization. Technical report, School of ORIE, Cornell University, Ithaca, NY."},{"issue":"5","key":"6036_CR19","doi-asserted-by":"publisher","first-page":"3080","DOI":"10.1137\/08073682X","volume":"48","author":"A Lewis","year":"2010","unstructured":"Lewis, A., & Pang, C. (2010). Lipschitz behavior of the robust regularization. SIAM Journal of Control and Optimization, 48(5), 3080\u20133104.","journal-title":"SIAM Journal of Control and Optimization"},{"key":"6036_CR20","unstructured":"Li, H., Xu, Z., Taylor, G., Studer, C., & Goldstein, T. (2018). Visualizing the loss landscape of neural nets. In Advances in neural information processing systems (pp. 6389\u20136399)."},{"key":"6036_CR21","doi-asserted-by":"crossref","unstructured":"Liao, F., Liang, M., Dong, Y., Pang, T., Hu, X., & Zhu, J. (2018). Defense against adversarial attacks using high-level representation guided denoiser. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1778\u20131787).","DOI":"10.1109\/CVPR.2018.00191"},{"key":"6036_CR22","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/s10107-018-1278-0","volume":"178","author":"H Liu","year":"2019","unstructured":"Liu, H., Wang, X., Yao, T., Li, R., & Ye, Y. (2019). Sample average approximation with sparsity-inducing penalty for high-dimensional stochastic programming. Mathematical Programming, 178, 69\u2013108.","journal-title":"Mathematical Programming"},{"issue":"1\u20132","key":"6036_CR23","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10107-003-0442-2","volume":"97","author":"Z-Q Luo","year":"2003","unstructured":"Luo, Z.-Q. (2003). Applications of convex optimization in signal processing and digital communication. Mathematical Programming, 97(1\u20132), 177\u2013207.","journal-title":"Mathematical Programming"},{"issue":"4","key":"6036_CR24","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1137\/S1052623403421498","volume":"14","author":"Z-Q Luo","year":"2004","unstructured":"Luo, Z.-Q., Sturm, J., & Zhang, S. (2004). Multivariate nonnegative quadratic mappings. SIAM Journal of Optimization, 14(4), 1140\u20131162.","journal-title":"SIAM Journal of Optimization"},{"key":"6036_CR25","unstructured":"Madry, A., Makelov, A., Schmidt, L., Tsipras, D., & Vladu, A. (2018). Towards deep learning models resistant to adversarial attacks. In Proceedings of the international conference on learning representations (ICLR)."},{"issue":"2","key":"6036_CR26","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1287\/opre.2013.1252","volume":"62","author":"H Men","year":"2014","unstructured":"Men, H., Freund, R., Nguyen, N., Saa-Seoane, J., & Peraire, J. (2014). Fabrication-adaptive optimization with an application to photonic crystal design. Operations Research, 62(2), 418\u2013434.","journal-title":"Operations Research"},{"key":"6036_CR27","unstructured":"Nguyen, Q., & Hein, M. (2017). The loss surface of deep and wide neural networks. In Proceedings of the 34th international conference on machine learning (Vol. 70, pp. 2603\u20132612). JMLR.org."},{"key":"6036_CR28","unstructured":"Oberman, A.\u00a0M., & Calder, J. (2018). Lipschitz regularized deep neural networks converge and generalize. arXiv:1808.09540."},{"key":"6036_CR29","unstructured":"Oliveira, R., & Thompson, P. (2017). Sample average approximation with heavier tails i: non-asymptotic bounds with weak assumptions and stochastic constraints. arXiv:1705.00822."},{"key":"6036_CR30","unstructured":"Oymak, S., Fabian, Z., Li, M., & Soltanolkotabi, M. (2019). Generalization guarantees for neural networks via harnessing the low-rank structure of the Jacobian. arXiv:1906.05392."},{"key":"6036_CR31","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1016\/j.laa.2003.10.013","volume":"391","author":"M Pinar","year":"2004","unstructured":"Pinar, M., & Arikan, O. (2004). On robust solutions to linear least squares problems affected by data uncertainty and implementation errors with application to stochastic signal modeling. Linear Algebra and its Applications, 391, 223\u2013243.","journal-title":"Linear Algebra and its Applications"},{"key":"6036_CR32","unstructured":"Qian, H., & Wegman, M.\u00a0N. (2018). L2-nonexpansive neural networks. arXiv:1802.07896."},{"key":"6036_CR33","volume-title":"Variational analysis, volume 317 of Grundlehren der Mathematischen Wissenschaft","author":"RT Rockafellar","year":"1998","unstructured":"Rockafellar, R. T., & Wets, R.J.-B. (1998). Variational analysis, volume 317 of Grundlehren der Mathematischen Wissenschaft (3rd printing-2009 edition). Berlin: Springer.","edition":"3rd printing-20"},{"issue":"2","key":"6036_CR34","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/s10589-012-9528-1","volume":"55","author":"JO Royset","year":"2013","unstructured":"Royset, J. O. (2013). On sample size control in sample average approximations for solving smooth stochastic programs. Computational Optimization and Applications, 55(2), 265\u2013309.","journal-title":"Computational Optimization and Applications"},{"key":"6036_CR37","doi-asserted-by":"publisher","first-page":"762","DOI":"10.1287\/opre.2013.1163","volume":"61","author":"JO Royset","year":"2013","unstructured":"Royset, J. O., & Szechtman, R. (2013). Optimal budget allocation for sample average approximation. Operations Research, 61, 762\u2013776.","journal-title":"Operations Research"},{"issue":"2","key":"6036_CR35","doi-asserted-by":"publisher","first-page":"1118","DOI":"10.1137\/16M1060704","volume":"27","author":"JO Royset","year":"2017","unstructured":"Royset, J. O., & Wets, R.J.-B. (2017). Variational theory for optimization under stochastic ambiguity. SIAM Journal of Optimization, 27(2), 1118\u20131149.","journal-title":"SIAM Journal of Optimization"},{"key":"6036_CR36","volume-title":"An optimization primer. Springer series in operations research and financial engineering","author":"JO Royset","year":"2021","unstructured":"Royset, J. O., & Wets, R.J.-B. (2021). An optimization primer. Springer series in operations research and financial engineering. Berlin: Springer."},{"key":"6036_CR38","unstructured":"Sagun, L., Bottou, L., & LeCun, Y. (2016). Eigenvalues of the hessian in deep learning: Singularity and beyond. arXiv:1611.07476."},{"key":"6036_CR39","first-page":"2635","volume":"11","author":"S Shalev-Shwartz","year":"2010","unstructured":"Shalev-Shwartz, S., Shamir, O., Srebro, N., & Sridharan, K. (2010). Learnability, stability and uniform convergence. Journal of Machine Learning Research, 11, 2635\u20132670.","journal-title":"Journal of Machine Learning Research"},{"key":"6036_CR40","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718751","volume-title":"Lectures on stochastic programming: Modeling and theory","author":"A Shapiro","year":"2009","unstructured":"Shapiro, A., Dentcheva, D., & Ruszczynski, A. (2009). Lectures on stochastic programming: Modeling and theory. Philipadia: SIAM."},{"key":"6036_CR41","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1016\/j.ejor.2007.03.048","volume":"191","author":"E Stinstra","year":"2008","unstructured":"Stinstra, E., & den Hertog, D. (2008). Robust optimization using computer experiments. European Journal of Operations Research, 191, 816\u2013837.","journal-title":"European Journal of Operations Research"},{"key":"6036_CR42","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-L., Hsu, C.-Y., Yu, C.-M., & Chen, P.-Y. (2021). Formalizing generalization and robustness of neural networks to weight perturbations. arXiv:2103.02200.","DOI":"10.1109\/ICASSP39728.2021.9414325"},{"issue":"6","key":"6036_CR43","doi-asserted-by":"publisher","first-page":"1358","DOI":"10.1287\/opre.2014.1314","volume":"62","author":"W Wiesemann","year":"2014","unstructured":"Wiesemann, W., Kuhn, D., & Sim, M. (2014). Distributionally robust convex optimization. Operations Research, 62(6), 1358\u20131376.","journal-title":"Operations Research"},{"key":"6036_CR44","unstructured":"Wong, E., Rice, L., & Kolter, J. Z. (2020). Fast is better than free: Revisiting adversarial training. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"6036_CR45","unstructured":"Wu, D., Xia, S.-T., & Wang, Y. (2020). Adversarial weight perturbation helps robust generalization. Advances in Neural Information Processing Systems, 33."},{"key":"6036_CR46","unstructured":"Zhang, C., Bengio, S., Hardt, M., Recht, B., & Vinyals, O. (2016). Understanding deep learning requires rethinking generalization. arXiv:1611.03530."},{"key":"6036_CR47","unstructured":"Zhang, H., Yu, Y., Jiao, J., Xing, E., Ghaoui, L.\u00a0E., & Jordan, M. (2019). Theoretically principled trade-off between robustness and accuracy. In Proceedings of the international conference on machine learning (ICML) (pp. 7472\u20137482)."},{"issue":"3","key":"6036_CR48","doi-asserted-by":"publisher","first-page":"1855","DOI":"10.1137\/15M1038529","volume":"26","author":"J Zhang","year":"2016","unstructured":"Zhang, J., Xu, H., & Zhang, L. (2016). Quantitative stability analysis for distributionally robust optimization with moment constraints. SIAM Journal of Optimization, 26(3), 1855\u20131882.","journal-title":"SIAM Journal of Optimization"},{"key":"6036_CR49","unstructured":"Zheng, T., Chen, C., & Ren, K. (2018). Is pgd-adversarial training necessary? Alternative training via a soft-quantization network with noisy-natural samples only. arXiv:1810.05665."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06036-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-021-06036-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06036-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,3]],"date-time":"2023-08-03T21:02:59Z","timestamp":1691096579000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-021-06036-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,2]]},"references-count":49,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["6036"],"URL":"https:\/\/doi.org\/10.1007\/s10994-021-06036-0","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2021,9,2]]},"assertion":[{"value":"21 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 July 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 July 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 September 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}