{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T15:48:17Z","timestamp":1778255297182,"version":"3.51.4"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T00:00:00Z","timestamp":1713225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T00:00:00Z","timestamp":1713225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T00:00:00Z","timestamp":1713225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Mag."],"published-print":{"date-parts":[[2024,4,16]]},"DOI":"10.1109\/msp.2024.3358284","type":"journal-article","created":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T17:31:58Z","timestamp":1713288718000},"page":"38-59","source":"Crossref","is-referenced-by-count":32,"title":["An Introduction to Bilevel Optimization: Foundations and applications in signal processing and machine learning"],"prefix":"10.1109","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5147-9838","authenticated-orcid":false,"given":"Yihua","family":"Zhang","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Michigan State University, East Lansing, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3055-2917","authenticated-orcid":false,"given":"Prashant","family":"Khanduri","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Wayne State University, Detroit, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1136-055X","authenticated-orcid":false,"given":"Ioannis","family":"Tsaknakis","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Minnesota, MN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0127-8707","authenticated-orcid":false,"given":"Yuguang","family":"Yao","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Michigan State University, East Lansing, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1263-9365","authenticated-orcid":false,"given":"Mingyi","family":"Hong","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Minnesota, MN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2817-6991","authenticated-orcid":false,"given":"Sijia","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Michigan State University, East Lansing, MI, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_20"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.2307\/2550609"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-007-0176-2"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tevc.2017.2712906"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2023.01.008"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-023-02238-9"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/tsp.2022.3143372"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tsp.2020.3043879"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1561\/2000000111"},{"key":"ref10","first-page":"26,693","article-title":"Revisiting and advancing fast adversarial training through the lens of bi-level optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2022"},{"key":"ref11","article-title":"Adversarial training should be cast as a non-zero-sum game","author":"Robey","year":"2023"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11838"},{"key":"ref13","first-page":"12,080","article-title":"MetaPoison: Practical general-purpose clean-label data poisoning","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Huang","year":"2020"},{"key":"ref14","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Finn","year":"2017"},{"key":"ref15","article-title":"Sign-MAML: Efficient model-agnostic meta-learning by signSGD","volume-title":"Proc. 5th Workshop Meta-Learn. Conf. Neural Inf. Process. Syst.","author":"Fan","year":"2021"},{"key":"ref16","first-page":"1","article-title":"Meta-learning with implicit gradients","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Rajeswaran","year":"2019"},{"key":"ref17","first-page":"1","article-title":"Coresets via bilevel optimization for continual learning and streaming","volume-title":"Proc. 34th Conf. Neural Inf. Process. Syst.","author":"Borsos","year":"2020"},{"key":"ref18","first-page":"1","article-title":"Advancing model pruning via bi-level optimization","volume-title":"Proc. 36th Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2022"},{"key":"ref19","article-title":"Gradient-based bi-level optimization for deep learning: A survey","author":"Chen","year":"2022"},{"key":"ref20","article-title":"Learning sample reweighting for accuracy and adversarial robustness","author":"Holtz","year":"2022"},{"key":"ref21","first-page":"6305","article-title":"A generic first-order algorithmic framework for bi-level programming beyond lower-level singleton","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Liu","year":"2020"},{"key":"ref22","first-page":"1723","article-title":"Truncated back-propagation for bilevel optimization","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Shaban","year":"2019"},{"key":"ref23","first-page":"1568","article-title":"Bilevel programming for hyperparameter optimization and meta-learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Franceschi","year":"2018"},{"key":"ref24","first-page":"1165","article-title":"Forward and reverse gradient-based hyperparameter optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Franceschi","year":"2017"},{"key":"ref25","article-title":"Darts: Differentiable architecture search","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Liu","year":"2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2021.3132674"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-5981-1"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1002\/wics.13"},{"key":"ref29","first-page":"18,098","article-title":"WoodFisher: Efficient second-order approximation for neural network compression","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Singh","year":"2020"},{"key":"ref30","article-title":"Approximation methods for bilevel programming","author":"Ghadimi","year":"2018"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1137\/20m1387341"},{"key":"ref32","first-page":"16,291","article-title":"Linearly constrained bilevel optimization: A smoothed implicit gradient approach","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Khanduri","year":"2023"},{"key":"ref33","article-title":"A constrained optimization approach to bilevel optimization with multiple inner minima","author":"Sow","year":"2022"},{"key":"ref34","article-title":"On penalty-based bilevel gradient descent method","author":"Shen","year":"2023"},{"key":"ref35","first-page":"6882","article-title":"A value-function-based interior-point method for non-convex bi-level optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2021"},{"key":"ref36","first-page":"17,248","article-title":"Bome Bilevel optimization made easy: A simple first-order approach","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu","year":"2022"},{"key":"ref37","first-page":"987","article-title":"Alternating projected SGD for equality-constrained bilevel optimization","volume-title":"Proc. 26th Int. Conf. Artif. Intel. Statist.","author":"Xiao","year":"2023"},{"key":"ref38","first-page":"2466","article-title":"A single-timescale method for stochastic bilevel optimization","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Chen","year":"2022"},{"key":"ref39","first-page":"1","article-title":"Amortized implicit differentiation for stochastic bilevel optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Arbel","year":"2022"},{"key":"ref40","first-page":"4882","article-title":"Bilevel optimization: Convergence analysis and enhanced design","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ji","year":"2021"},{"key":"ref41","first-page":"30,271","article-title":"A near-optimal algorithm for stochastic bilevel optimization via double-momentum","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Khanduri","year":"2021"},{"key":"ref42","article-title":"Tighter analysis of alternating stochastic gradient method for stochastic nested problems","author":"Chen","year":"2021"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1137\/120880811"},{"key":"ref44","first-page":"15,236","article-title":"Momentum-based variance reduction in non-convex SGD","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Cutkosky","year":"2019"},{"key":"ref45","first-page":"1","article-title":"Provably faster algorithms for bilevel optimization","volume-title":"Proc. 35th Int. Conf. Neural Inf. Process. Syst.","author":"Yang","year":"2021"},{"key":"ref46","first-page":"1","article-title":"A framework for bilevel optimization that enables stochastic and global variance reduction algorithms","volume-title":"Proc. 36th Int. Conf. Neural Inf. Process. Syst.","author":"Dagrou","year":"2022"},{"key":"ref47","first-page":"2206","article-title":"Reliable evaluation of adversarial robustness with an ensemble of diverse parameter-free attacks","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Croce","year":"2020"},{"key":"ref48","first-page":"18,083","article-title":"A fully first-order method for stochastic bilevel optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kwon","year":"2023"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/tsp.2023.3234462"},{"key":"ref50","article-title":"Randomized stochastic variance-reduced methods for multi-task stochastic bilevel optimization","author":"Guo","year":"2021"},{"key":"ref51","first-page":"1","article-title":"SPIDER: Near-optimal non-convex optimization via stochastic path-integrated differential estimator","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Fang","year":"2018"},{"key":"ref52","first-page":"1","article-title":"SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives","author":"Defazio","year":"2014","journal-title":"Proc. Int. Conf. Neural Inf. Process. Syst."},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/tccn.2018.2881442"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/tcomm.2006.884849"},{"key":"ref55","first-page":"1","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Frankle","year":"2018"},{"key":"ref56","article-title":"Invariant risk minimization","author":"Arjovsky","year":"2019"},{"key":"ref57","first-page":"145","article-title":"Invariant risk minimization games","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Ahuja","year":"2020"},{"key":"ref58","article-title":"What is missing in IRM training and evaluation? Challenges and solutions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang","year":"2023"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.01.012"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.06083"},{"key":"ref61","first-page":"1","article-title":"Fast is better than free: Revisiting adversarial training","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wong","year":"2020"},{"key":"ref62","first-page":"16,048","article-title":"Understanding and improving fast adversarial training","volume-title":"Proc. 34th Conf. Neural Inf. Process. Syst.","author":"Andriushchenko","year":"2020"}],"container-title":["IEEE Signal Processing Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/79\/10501955\/10502023.pdf?arnumber=10502023","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,9]],"date-time":"2024-05-09T17:44:52Z","timestamp":1715276692000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10502023\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,16]]},"references-count":62,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/msp.2024.3358284","relation":{},"ISSN":["1053-5888","1558-0792"],"issn-type":[{"value":"1053-5888","type":"print"},{"value":"1558-0792","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,16]]}}}