{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:35:36Z","timestamp":1776886536658,"version":"3.51.2"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T00:00:00Z","timestamp":1738713600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T00:00:00Z","timestamp":1738713600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000086","name":"Directorate for Mathematical and Physical Sciences","doi-asserted-by":"publisher","award":["2134168"],"award-info":[{"award-number":["2134168"]}],"id":[{"id":"10.13039\/100000086","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004351","name":"Cisco Systems","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004351","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004316","name":"International Business Machines Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004316","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000148","name":"Division of Electrical, Communications and Cyber Systems","doi-asserted-by":"publisher","award":["2047177"],"award-info":[{"award-number":["2047177"]}],"id":[{"id":"10.13039\/100000148","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s10107-025-02194-4","type":"journal-article","created":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T01:46:17Z","timestamp":1738719977000},"page":"539-589","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["On penalty-based bilevel gradient descent method"],"prefix":"10.1007","volume":"214","author":[{"given":"Han","family":"Shen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Quan","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianyi","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,5]]},"reference":[{"key":"2194_CR1","unstructured":"Arbel, M., Mairal, J.: Non-convex bilevel games with critical point selection maps. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"issue":"1","key":"2194_CR2","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1137\/080716542","volume":"2","author":"A Beck","year":"2009","unstructured":"Beck, A., Teboulle, M.: A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM J. Imaging Sci. 2(1), 183\u2013202 (2009)","journal-title":"SIAM J. Imaging Sci."},{"key":"2194_CR3","unstructured":"Bolte, J., Pauwels, E., Vaiter, S.: Automatic differentiation of nonsmooth iterative algorithms. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L., Jose, S.T., Nikoloska, I., Park, S., Chen, T., Simeone, O.: Learning with limited samples: meta-learning and applications to communication systems. Found. Trends Signal Process. 17(2), 79\u2013208 (2023a)","DOI":"10.1561\/2000000115"},{"key":"2194_CR5","unstructured":"Chen, L., Xu, J., Zhang, J.: On finding small hyper-gradients in bilevel optimization: hardness results and improved analysis. In: Proceedings of Conference on Learning Theory (2024)"},{"key":"2194_CR6","unstructured":"Chen, T., Sun, Y., Yin, W.: Tighter analysis of alternating stochastic gradient method for stochastic nested problems. In: Proceedings of Advances in Neural Information Processing Systems (2021)"},{"key":"2194_CR7","unstructured":"Chen, T., Sun, Y., Xiao, Q., Yin, W.: A single-timescale method for stochastic bilevel optimization. In: Proceedings of International Conference on Artificial Intelligence and Statistics (2022)"},{"key":"2194_CR8","unstructured":"Chen, X., Huang, M., Ma, S., Balasubramanian, K.: Decentralized stochastic bilevel optimization with improved per-iteration complexity. In: Proceedings of International Conference on Machine Learning (2023b)"},{"key":"2194_CR9","unstructured":"Cheng, C., Xie, T., Jiang, N., Agarwal, A.: Adversarially trained actor critic for offline reinforcement learning. In: Proceedings of International Conference on Machine Learning (2022)"},{"key":"2194_CR10","doi-asserted-by":"crossref","unstructured":"Clarke, F.: Optimization and Nonsmooth Analysis. SIAM (1990)","DOI":"10.1137\/1.9781611971309"},{"issue":"1","key":"2194_CR11","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/s10479-007-0176-2","volume":"153","author":"B Colson","year":"2007","unstructured":"Colson, B., Marcotte, P., Savard, G.: An overview of bilevel optimization. Ann. Oper. Res. 153(1), 235\u2013256 (2007)","journal-title":"Ann. Oper. Res."},{"issue":"2\u20133","key":"2194_CR12","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1561\/2000000111","volume":"15","author":"C Crockett","year":"2022","unstructured":"Crockett, C., Fessler, J.: Bilevel methods for image reconstruction. Found. Trends Signal Process. 15(2\u20133), 121\u2013289 (2022)","journal-title":"Found. Trends Signal Process."},{"key":"2194_CR13","unstructured":"Dagr\u00e9ou, M., Ablin, P., Vaiter, S., Moreau, T.: A framework for bilevel optimization that enables stochastic and global variance reduction algorithms. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"issue":"2","key":"2194_CR14","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/s10208-021-09516-w","volume":"22","author":"D Davis","year":"2022","unstructured":"Davis, D., Drusvyatskiy, D.: Proximal methods avoid active strict saddles of weakly convex functions. Found. Comput. Math. 22(2), 561\u2013606 (2022)","journal-title":"Found. Comput. Math."},{"key":"2194_CR15","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s10107-010-0342-1","volume":"131","author":"S Dempe","year":"2012","unstructured":"Dempe, S., Dutta, J.: Is bilevel programming a special case of a mathematical program with complementarity constraints? Math. Program. 131, 37\u201348 (2012)","journal-title":"Math. Program."},{"issue":"3","key":"2194_CR16","doi-asserted-by":"publisher","first-page":"1202","DOI":"10.1016\/j.na.2011.05.097","volume":"75","author":"S Dempe","year":"2012","unstructured":"Dempe, S., Zemkoho, A.: On the Karush-Kuhn-Tucker reformulation of the bilevel optimization problem. Nonlinear Anal. Theory Methods Appl. 75(3), 1202\u20131218 (2012)","journal-title":"Nonlinear Anal. Theory Methods Appl."},{"key":"2194_CR17","doi-asserted-by":"crossref","unstructured":"Dempe, S., Kalashnikov, V., Kalashnykova, N.: Optimality conditions for bilevel programming problems. In: Optimization with Multivalued Mappings: Theory, Applications, and Algorithms, pp. 3\u201328 (2006)","DOI":"10.1007\/0-387-34221-4_1"},{"key":"2194_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-87821-8","volume-title":"Implicit Functions and Solution Mappings","author":"AL Dontchev","year":"2009","unstructured":"Dontchev, A.L., Rockafellar, R.T.: Implicit Functions and Solution Mappings, vol. 543. Springer (2009)"},{"issue":"3","key":"2194_CR19","doi-asserted-by":"publisher","first-page":"919","DOI":"10.1287\/moor.2017.0889","volume":"43","author":"D Drusvyatskiy","year":"2018","unstructured":"Drusvyatskiy, D., Lewis, A.: Error bounds, quadratic growth, and linear convergence of proximal methods. Math. Oper. Res. 43(3), 919\u2013948 (2018)","journal-title":"Math. Oper. Res."},{"issue":"1","key":"2194_CR20","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/s10107-018-1311-3","volume":"178","author":"D Drusvyatskiy","year":"2019","unstructured":"Drusvyatskiy, D., Paquette, C.: Efficiency of minimizing compositions of convex functions and smooth maps. Math. Program. 178(1), 503\u2013558 (2019)","journal-title":"Math. Program."},{"key":"2194_CR21","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/BF01585928","volume":"70","author":"JE Falk","year":"1995","unstructured":"Falk, J.E., Liu, J.: On bilevel programming, part I: general nonlinear cases. Math. Program. 70, 47\u201372 (1995)","journal-title":"Math. Program."},{"key":"2194_CR22","doi-asserted-by":"crossref","unstructured":"Fiacco, A.: Optimal value continuity and differential stability bounds under the mangasarian-fromovitz constraint qualification. In: Mathematical Programming with Data Perturbations II, 2nd edn, pp. 65\u201390. CRC Press (2020)","DOI":"10.1201\/9781003071884-5"},{"key":"2194_CR23","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: Proceedings of International Conference on Machine Learning (2022)"},{"key":"2194_CR24","unstructured":"Franceschi, L., Donini, M., Frasconi, P., Pontil, M.: Forward and reverse gradient-based hyperparameter optimization. In: Proceedings of International Conference on Machine Learning (2017)"},{"key":"2194_CR25","unstructured":"Franceschi, L., Frasconi, P., Salzo, S., Grazzi, R., Pontil, M.: Bilevel programming for hyperparameter optimization and meta-learning. In: Proceedings of International Conference on Machine Learning (2018)"},{"key":"2194_CR26","unstructured":"Gao, L., Ye, J., Yin, H., Zeng, S., Zhang, J.: Value function based difference-of-convex algorithm for bilevel hyperparameter selection problems. In: Proceedings of International Conference on Machine Learning (2022)"},{"key":"2194_CR27","unstructured":"Ghadimi, S., Wang, M.: Approximation methods for bilevel programming. arXiv preprint arXiv:1802.02246 (2018)"},{"issue":"1","key":"2194_CR28","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1), 267\u2013305 (2016)","journal-title":"Math. Program."},{"key":"2194_CR29","unstructured":"Giovannelli, T., Kent, G., Vicente, L.: Inexact bilevel stochastic gradient methods for constrained and unconstrained lower-level problems. arXiv preprint arXiv:2110.00604 (2022)"},{"key":"2194_CR30","unstructured":"Giovannelli, T., Kent, G., Vicente, L.: Bilevel optimization with a multi-objective lower-level problem: risk-neutral and risk-averse formulations. In: Optimization Methods and Software, pp. 1\u201323 (2022)"},{"key":"2194_CR31","unstructured":"Gong, C., Liu, X., Liu, Q.: Automatic and harmless regularization with constrained and lexicographic optimization: a dynamic barrier approach. In: Proceedings of Advances in Neural Information Processing Systems (2021)"},{"key":"2194_CR32","unstructured":"Grazzi, R., Franceschi, L., Pontil, M., Salzo, S.: On the iteration complexity of hypergradient computation. In: Proceedings of International Conference on Machine Learning, pp. 3748\u20133758 (2020)"},{"key":"2194_CR33","doi-asserted-by":"crossref","unstructured":"Hong, M., Wai, H.T., Wang, Z., Yang, Z.: A two-timescale framework for bilevel optimization: complexity analysis and application to actor-critic. SIAM J. Optim. 33(1) (2023)","DOI":"10.1137\/20M1387341"},{"key":"2194_CR34","doi-asserted-by":"crossref","unstructured":"Hu, J., Ji, X., Pang, J.S.: Model selection via bilevel optimization. In: IEEE International Joint Conference on Neural Network, pp. 1922\u20131929 (2006)","DOI":"10.1109\/IJCNN.2006.246935"},{"key":"2194_CR35","unstructured":"Hu, Q., Zhong, Y., Yang, T.: Multi-block min-max bilevel optimization with applications in multi-task deep auc maximization. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR36","unstructured":"Huang, F., Li, J., Gao, S., Huang, H.: Enhanced bilevel optimization via Bregman distance. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR37","unstructured":"Ji, K., Yang, J., Liang, Y.: Bilevel optimization: convergence analysis and enhanced design. In: Proceedings of International Conference on Machine Learning (2021)"},{"key":"2194_CR38","unstructured":"Ji, K., Liu, M., Liang, Y., Ying, L.: Will bilevel optimizers benefit from loops. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR39","unstructured":"Jiang, H., Chen, Z., Shi, Y., Dai, B., Zhao, T.: Learning to defend by learning to attack. In: Proceedings of International Conference on Artificial Intelligence and Statistics (2021)"},{"key":"2194_CR40","unstructured":"Jin, C., Ge, R., Netrapalli, P., Kakade, S.M., Jordan, M.I.: How to escape saddle points efficiently. In: Proceedings of International Conference on Machine Learning, pp 1724\u20131732 (2017)"},{"key":"2194_CR41","doi-asserted-by":"crossref","unstructured":"Karimi, H., Nutini, J., Schmidt, M.: Linear convergence of gradient and proximal-gradient methods under the Polyak-Lojasiewicz condition. In: Proceedings of Joint European Conference on Machine Learning and Knowledge Discovery in Databases (2016)","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"2194_CR42","unstructured":"Khanduri., P., Zeng, S., Hong, M., Wai, H.T., Wang, Z., Yang, Z.: A near-optimal algorithm for stochastic bilevel optimization via double-momentum. In: Proceedings of Advances in Neural Information Processing Systems (2021)"},{"key":"2194_CR43","unstructured":"Lee, J.D., Simchowitz, M., Jordan, M.I., Recht, B.: Gradient descent only converges to minimizers. In: Proceedings of Conference on Learning Theory, pp. 1246\u20131257 (2016)"},{"key":"2194_CR44","doi-asserted-by":"crossref","unstructured":"Li, J., Gu, B., Huang, H.: A fully single loop algorithm for bilevel optimization without hessian inverse. In: Proceedings of AAAI Conference on Artificial Intelligence (2022)","DOI":"10.1609\/aaai.v36i7.20706"},{"key":"2194_CR45","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.acha.2021.12.009","volume":"59","author":"C Liu","year":"2022","unstructured":"Liu, C., Zhu, L., Belkin, M.: Loss landscapes and optimization in over-parameterized non-linear systems and neural networks. Appl. Comput. Harmon. Anal. 59, 85\u2013116 (2022)","journal-title":"Appl. Comput. Harmon. Anal."},{"key":"2194_CR46","unstructured":"Liu, R., Mu, P., Yuan, X., Zeng, S., Zhang, J.: A generic first-order algorithmic framework for bi-level programming beyond lower-level singleton. In: Proceedings of International Conference on Machine Learning (2020)"},{"issue":"12","key":"2194_CR47","doi-asserted-by":"publisher","first-page":"10045","DOI":"10.1109\/TPAMI.2021.3132674","volume":"44","author":"R Liu","year":"2021","unstructured":"Liu, R., Gao, J., Zhang, J., Meng, D., Lin, Z.: Investigating bilevel optimization for learning and vision from a unified perspective: a survey and beyond. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 10045\u201310067 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2194_CR48","unstructured":"Liu, R., Liu, X., Yuan, X., Zeng, S., Zhang, J.: A value-function-based interior-point method for non-convex bi-level optimization. In: Proceedings of International Conference on Machine Learning (2021b)"},{"key":"2194_CR49","unstructured":"Liu, R., Liu, Y., Zeng, S., Zhang, J.: Towards gradient-based bilevel optimization with non-convex followers and beyond. In: Proceedings of Advances in Neural Information Processing Systems (2021c)"},{"issue":"1","key":"2194_CR50","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/TPAMI.2022.3140249","volume":"45","author":"R Liu","year":"2022","unstructured":"Liu, R., Mu, P., Yuan, X., Zeng, S., Zhang, J.: A general descent aggregation framework for gradient-based bi-level optimization. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 38\u201357 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2194_CR51","doi-asserted-by":"crossref","unstructured":"Lu, S., Cui, X., Squillante, M., Kingsbury, B., Horesh, L.: Decentralized bilevel optimization for personalized client learning. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (2022)","DOI":"10.1109\/ICASSP43922.2022.9746612"},{"key":"2194_CR52","unstructured":"Lu, Z., Mei, S.: First-order penalty methods for bilevel optimization. arXiv preprint arXiv:2301.01716 (2023)"},{"key":"2194_CR53","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511983658","volume-title":"Mathematical Programs with Equilibrium Constraints","author":"Z Luo","year":"1996","unstructured":"Luo, Z., Pang, J., Ralph, D.: Mathematical Programs with Equilibrium Constraints. Cambridge University Press (1996)"},{"key":"2194_CR54","unstructured":"Maclaurin, D., Duvenaud, D., Adams, R.: Gradient-based hyperparameter optimization through reversible learning. In: Proceedings of International Conference on Machine Learning (2015)"},{"key":"2194_CR55","unstructured":"Mehra, A., Hamm, J.: Penalty method for inversion-free deep bilevel optimization. In: Asian Conference on Machine Learning (2021)"},{"key":"2194_CR56","unstructured":"Mei, J., Xiao, C., Szepesvari, C., Schuurmans, D.: On the global convergence rates of softmax policy gradient methods. In: Proceedings of International Conference on Machine Learning (2020)"},{"issue":"1","key":"2194_CR57","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/s10107-012-0629-5","volume":"140","author":"Y Nesterov","year":"2013","unstructured":"Nesterov, Y.: Gradient methods for minimizing composite functions. Math. Program. 140(1), 125\u2013161 (2013)","journal-title":"Math. Program."},{"issue":"1","key":"2194_CR58","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10107-006-0706-8","volume":"108","author":"Y Nesterov","year":"2006","unstructured":"Nesterov, Y., Polyak, B.: Cubic regularization of newton method and its global performance. Math. Program. 108(1), 177\u2013205 (2006)","journal-title":"Math. Program."},{"key":"2194_CR59","unstructured":"Nichol, A., Achiam, J., Schulman, J.: On first-order meta-learning algorithms. arXiv preprint arXiv:1803.02999 (2018)"},{"key":"2194_CR60","unstructured":"Nouiehed, M., Sanjabi, M., Huang, T., Lee, J., Razaviyayn, M.: Solving a class of non-convex min-max games using iterative first order methods. In: Proceedings of Advances in Neural Information Processing Systems (2019)"},{"key":"2194_CR61","unstructured":"Pedregosa, F.: Hyperparameter optimization with approximate gradient. In: Proceedings of International Conference on Machine Learning (2016)"},{"key":"2194_CR62","unstructured":"Rajeswaran, A., Finn, C., Kakade, S., Levine, S.: Meta-learning with implicit gradients. In: Proceedings of Advances in Neural Information Processing Systems (2019)"},{"issue":"2","key":"2194_CR63","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1137\/16M105592X","volume":"27","author":"S Sabach","year":"2017","unstructured":"Sabach, S., Shtern, S.: A first order method for solving convex bilevel optimization problems. SIAM J. Optim. 27(2), 640\u2013660 (2017)","journal-title":"SIAM J. Optim."},{"key":"2194_CR64","unstructured":"Shaban, A., Cheng, C., Hatch, N., Boots, B.: Truncated back-propagation for bilevel optimization. In: Proceedings of International Conference on Artificial Intelligence and Statistics (2019)"},{"key":"2194_CR65","unstructured":"Shen, H., Chen, T.: A single-timescale analysis for stochastic approximation with multiple coupled sequences. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR66","unstructured":"Shen, H., Chen, T.: On penalty-based bilevel gradient descent method. In: Proceedings of International Conference on Machine Learning (2023)"},{"key":"2194_CR67","unstructured":"Shen, H., Yang, Z., Chen, T.: Principled penalty-based methods for bilevel reinforcement learning and RLHF. In: Proceedings of International Conference on Machine Learning (2024)"},{"key":"2194_CR68","unstructured":"Sow, D., Ji, K., Liang, Y.: On the convergence theory for hessian-free bilevel algorithms. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR69","volume-title":"The Theory of Market Economy","author":"H Stackelberg","year":"1952","unstructured":"Stackelberg, H.: The Theory of Market Economy. Oxford University Press (1952)"},{"key":"2194_CR70","unstructured":"Tarzanagh, D., Li, M., Thrampoulidis, C., Oymak, S.: Fednest: federated bilevel, minimax, and compositional optimization. In: Proceedings of International Conference on Machine Learning (2022)"},{"issue":"3","key":"2194_CR71","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1007\/BF01096458","volume":"5","author":"L Vicente","year":"1994","unstructured":"Vicente, L., Calamai, P.: Bilevel and multilevel programming: a bibliography review. J. Global Optim. 5(3), 291\u2013306 (1994)","journal-title":"J. Global Optim."},{"issue":"2","key":"2194_CR72","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/BF02191670","volume":"81","author":"L Vicente","year":"1994","unstructured":"Vicente, L., Savard, G., J\u00fadice, J.: Descent approaches for quadratic bilevel programming. J. Optim. Theory Appl. 81(2), 379\u2013399 (1994)","journal-title":"J. Optim. Theory Appl."},{"key":"2194_CR73","unstructured":"Vicol, P., Lorraine, J., Pedregosa, F., Duvenaud, D., Grosse, R.: On implicit bias in overparameterized bilevel optimization. In: Proceedings of International Conference on Machine Learning (2022)"},{"key":"2194_CR74","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10107-016-1017-3","volume":"161","author":"M Wang","year":"2017","unstructured":"Wang, M., Fang, E., Liu, H.: Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions. Math. Program. 161, 419\u2013449 (2017)","journal-title":"Math. Program."},{"key":"2194_CR75","unstructured":"Xiao, Q., Lu, S., Chen, T.: A generalized alternating method for bilevel learning under the polyak-\u0142ojasiewicz condition. In: Proceedings of Advances in Neural Information Processing Systems (2023a)"},{"key":"2194_CR76","unstructured":"Xiao, Q., Shen, H., Yin, W., Chen, T.: Alternating implicit projected sgd and its efficient variants for equality-constrained bilevel optimization. In: Proceedings of International Conference on Artificial Intelligence and Statistics (2023b)"},{"key":"2194_CR77","unstructured":"Yang, J., Ji, K., Liang, Y.: Provably faster algorithms for bilevel optimization. arXiv preprint arXiv:2106.04692 (2021)"},{"key":"2194_CR78","unstructured":"Yang S, Zhang X, Wang M (2022) Decentralized gossip-based stochastic bilevel optimization over communication networks. In: Proceedings of Advances in Neural Information Processing Systems"},{"key":"2194_CR79","doi-asserted-by":"crossref","unstructured":"Ye, J.: Constraint qualifications and optimality conditions in bilevel optimization. Bilevel Optimization: Advances and Next Challenges, pp. 227\u2013251 (2020)","DOI":"10.1007\/978-3-030-52119-6_8"},{"key":"2194_CR80","doi-asserted-by":"crossref","unstructured":"Ye, J., Zhu, D., Zhu, Q.: Exact penalization and necessary optimality conditions for generalized bilevel programming problems. SIAM J. Optim. 7(2) (1997)","DOI":"10.1137\/S1052623493257344"},{"issue":"2","key":"2194_CR81","doi-asserted-by":"publisher","first-page":"1583","DOI":"10.1007\/s10107-022-01888-3","volume":"198","author":"J Ye","year":"2023","unstructured":"Ye, J., Yuan, X., Zeng, S., Zhang, J.: Difference of convex algorithms for bilevel programs with applications in hyperparameter selection. Math. Program. 198(2), 1583\u20131616 (2023)","journal-title":"Math. Program."},{"issue":"1","key":"2194_CR82","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1080\/02331939508844060","volume":"33","author":"JJ Ye","year":"1995","unstructured":"Ye, J.J., Zhu, D.: Optimality conditions for bilevel programming problems. Optimization 33(1), 9\u201327 (1995)","journal-title":"Optimization"},{"issue":"4","key":"2194_CR83","doi-asserted-by":"publisher","first-page":"1885","DOI":"10.1137\/080725088","volume":"20","author":"JJ Ye","year":"2010","unstructured":"Ye, J.J., Zhu, D.: New necessary optimality conditions for bilevel programs by combining the MPEC and value function approaches. SIAM J. Optim. 20(4), 1885\u20131905 (2010)","journal-title":"SIAM J. Optim."},{"key":"2194_CR84","unstructured":"Ye, M., Liu, B., Wright, S., Stone, P., Liu, Q.: Bome! bilevel optimization made easy: a simple first-order approach. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"2194_CR85","unstructured":"Zhou, X., Pi, R., Zhang, W., Lin, Y., Chen, Z., Zhang, T.: Probabilistic bilevel coreset selection. In: Proceedings of International Conference on Machine Learning (2022)"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-025-02194-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10107-025-02194-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-025-02194-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T07:45:52Z","timestamp":1764575152000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10107-025-02194-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,5]]},"references-count":85,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["2194"],"URL":"https:\/\/doi.org\/10.1007\/s10107-025-02194-4","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,5]]},"assertion":[{"value":"29 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}