{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T19:01:40Z","timestamp":1778785300466,"version":"3.51.4"},"reference-count":232,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&amp;D Program of China","award":["2020YFB1313503"],"award-info":[{"award-number":["2020YFB1313503"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61922019"],"award-info":[{"award-number":["61922019"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["11971220"],"award-info":[{"award-number":["11971220"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114700072"],"award-info":[{"award-number":["RCYX20200714114700072"]}]},{"DOI":"10.13039\/501100006469","name":"Fundo para o Desenvolvimento das Ci\u00eancias e da Tecnologia","doi-asserted-by":"publisher","award":["061\/2020\/A2"],"award-info":[{"award-number":["061\/2020\/A2"]}],"id":[{"id":"10.13039\/501100006469","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61731018"],"award-info":[{"award-number":["61731018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Tianyuan Fund for Mathematics","award":["12026606"],"award-info":[{"award-number":["12026606"]}]},{"name":"PKU-Baidu Fund Project","award":["2020BD006"],"award-info":[{"award-number":["2020BD006"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpami.2021.3132674","type":"journal-article","created":{"date-parts":[[2021,12,6]],"date-time":"2021-12-06T21:01:33Z","timestamp":1638824493000},"page":"10045-10067","source":"Crossref","is-referenced-by-count":119,"title":["Investigating Bi-Level Optimization for Learning and Vision From a Unified Perspective: A Survey and Beyond"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9554-0565","authenticated-orcid":false,"given":"Risheng","family":"Liu","sequence":"first","affiliation":[{"name":"DUT-RU International School of Information Science &amp; Engineering, Dalian University of Technology, Dalian, Liaoning, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0023-1269","authenticated-orcid":false,"given":"Jiaxin","family":"Gao","sequence":"additional","affiliation":[{"name":"DUT-RU International School of Information Science &amp; Engineering, Dalian University of Technology, Dalian, Liaoning, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6691-5612","authenticated-orcid":false,"given":"Jin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Mathematics, Southern University of Science and Technology, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1294-8283","authenticated-orcid":false,"given":"Deyu","family":"Meng","sequence":"additional","affiliation":[{"name":"School of Mathematics and Statistics and Ministry of Education Key Lab of Intelligent Networks and Network Security, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, Shaanxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1493-7569","authenticated-orcid":false,"given":"Zhouchen","family":"Lin","sequence":"additional","affiliation":[{"name":"Key Laboratory of Machine Perception (Ministry of Education), School of Artificial Intelligence, Peking University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref170","first-page":"424","article-title":"Provable guarantees for gradient-based meta-learning","author":"balcan","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref172","first-page":"1997","article-title":"Neural architecture search: A survey","volume":"20","author":"elsken","year":"2019"},{"key":"ref171","article-title":"Meta-learning with adaptive layerwise metric and subspace","author":"lee","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref174","first-page":"58","article-title":"Towards automatically-tuned neural networks","author":"mendoza","year":"2016","journal-title":"Proc Workshop Autom Mach Learn"},{"key":"ref173","article-title":"Probabilistic neural architecture search","author":"casale","year":"2019"},{"key":"ref176","article-title":"Auto-meta: Automated gradient based meta learner search","author":"kim","year":"2018"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6084"},{"key":"ref178","article-title":"Boosting adversarial training with hypersphere embedding","author":"pang","year":"2020"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00035"},{"key":"ref168","first-page":"3988","article-title":"Learning to learn by gradient descent by gradient descent","author":"andrychowicz","year":"2016","journal-title":"Proc 30th Int Conf Neural Inf Process Syst"},{"key":"ref169","article-title":"Meta-learning with warped gradient descent","author":"flennerhag","year":"2020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.09.021"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/9.58565"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1142\/S1469026820500200"},{"key":"ref32","first-page":"6305","article-title":"A generic first-order algorithmic framework for bi-level programming beyond lower-level singleton","author":"liu","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_10"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/BF02191670"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1080\/02331934.2010.536231"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6377(94)90086-8"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1142\/S0219622007002459"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s10898-011-9762-6"},{"key":"ref181","article-title":"Meta-gradient reinforcement learning","author":"xu","year":"2018"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/EI247390.2019.9061889"},{"key":"ref185","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1109\/LSP.2018.2889277","article-title":"Learning bilevel layer priors for single image rain streaks removal","volume":"26","author":"pan","year":"2019","journal-title":"IEEE Signal Process Lett"},{"key":"ref184","article-title":"Provable representation learning for imitation learning via bi-level optimization","author":"arora","year":"2020"},{"key":"ref183","article-title":"Learning to reinforcement learn","author":"wang","year":"2016"},{"key":"ref182","first-page":"617","article-title":"Model-based reinforcement learning via meta-policy optimization","author":"clavera","year":"2018","journal-title":"Proc Conf Robot Learn"},{"key":"ref189","article-title":"A generic descent aggregation framework for gradient-based bi-level optimization","author":"liu","year":"2021"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2012.6467410"},{"key":"ref187","first-page":"4202","article-title":"Bilevel distance metric learning for robust image recognition","author":"xu","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref186","article-title":"A novel bilevel paradigm for image-to-image translation","author":"ma","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/0913069"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10589-015-9795-8"},{"key":"ref179","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc 27th Int Conf Neural Inf Process Syst"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11228-016-0371-x"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6226"},{"key":"ref22","first-page":"8353","article-title":"Provably global convergence of actor-critic: A case for linear quadratic regulator with ergodic cost","author":"yang","year":"2019","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref21","article-title":"Connecting generative adversarial networks and actor-critic methods","author":"pfau","year":"2016"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102782"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-18461-6_52"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1080\/10556780802102586"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/101"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-010-0342-1"},{"key":"ref51","article-title":"Forward and reverse gradient-based hyperparameter optimization","author":"franceschi","year":"2017"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01226"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483429"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68860-0_2"},{"key":"ref155","article-title":"SOSELETO: A unified approach to transfer learning and training with noisy labels","author":"litany","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414206"},{"key":"ref152","article-title":"Consistency analysis of bilevel data-driven learning in inverse problems","author":"chada","year":"2020"},{"key":"ref151","first-page":"14879","article-title":"Coresets via bilevel optimization for continual learning and streaming","author":"borsos","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref146","article-title":"Bilevel integrative optimization for ill-posed inverse problems","author":"liu","year":"2019"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01042"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2992987"},{"key":"ref149","first-page":"1690","article-title":"Learning data augmentation with online bilevel optimization for image classification","author":"mounsaveng","year":"2020","journal-title":"Proc IEEE Winter Conf Appl Comput Vis"},{"key":"ref59","article-title":"PC-DARTS: Partial channel connections for memory-efficient architecture search","author":"xu","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.2976694"},{"key":"ref57","article-title":"How to train your MAML","author":"antoniou","year":"2019"},{"key":"ref56","first-page":"748","article-title":"Learning to learn without gradient descent by gradient descent","author":"chen","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref55","article-title":"Meta-SGD: Learning to learn quickly for few-shot learning","author":"li","year":"2017"},{"key":"ref54","article-title":"Multi-step model-agnostic meta-learning: Convergence and improved algorithms","author":"ji","year":"2020"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICMEW53276.2021.9455948"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_6"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2018.03.004"},{"key":"ref167","article-title":"Towards interpretable multi-task learning using bilevel programming","author":"alesiani","year":"2020"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11838"},{"key":"ref165","article-title":"An overview of multi-task learning in deep neural networks","author":"ruder","year":"2017"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5529-2_1"},{"key":"ref163","article-title":"Stochastic hyperparameter optimization through hypernetworks","author":"lorraine","year":"2018"},{"key":"ref162","article-title":"Bilevel approaches for learning of variational imaging models","volume":"18","author":"calatroni","year":"2017","journal-title":"Variational methods in Imaging and geometric control"},{"key":"ref161","first-page":"1147","article-title":"Cross-validation optimization for large scale structured classification kernel methods","volume":"9","author":"seeger","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1023\/A:1012450327387"},{"key":"ref4","article-title":"Bilevel programming problems","author":"dempe","year":"2015","journal-title":"Energy Systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2307\/2581394"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_5"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2017.2712906"},{"key":"ref8","first-page":"377","article-title":"Efficient multiple hyperparameter learning for log-linear models","author":"foo","year":"2008","journal-title":"Proc 20th Int Conf Neural Inf Process Syst"},{"key":"ref159","article-title":"Automatic differentiation of algorithms for machine learning","author":"baydin","year":"2014"},{"key":"ref7","first-page":"318","article-title":"Generic methods for optimization-based modeling","author":"domke","year":"2012","journal-title":"Proc Artif Intell Stat"},{"key":"ref49","author":"kunapuli","year":"2008","journal-title":"A Bilevel Optimization Approach to Machine Learning"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2014.11.025"},{"key":"ref9","article-title":"Self-tuning networks: Bilevel optimization of hyperparameters using structured best-response functions","author":"mackay","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref158","first-page":"115","article-title":"Making a science of model search: Hyperparameter optimization in hundreds of dimensions for vision architectures","author":"bergstra","year":"2013","journal-title":"Proc 30th Int Conf Int Conf Mach Learn"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(03)00092-9"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.3390\/mca25040066"},{"key":"ref48","author":"aussel","year":"2018","journal-title":"Generalized Nash Equilibrium Problems Bilevel Programming and MPEC"},{"key":"ref47","first-page":"1","article-title":"Theoretical and numerical comparison of the Karush&#x2013;Kuhn&#x2013;Tucker and value function reformulations in bilevel optimization","volume":"78","author":"zemkoho","year":"2020","journal-title":"Comput Optim Appl"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-012-0535-x"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017571111854"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2018.8477763"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2018.06.004"},{"key":"ref73","article-title":"Far-HO: A bilevel programming package for hyperparameter optimization and meta-learning","author":"franceschi","year":"2018"},{"key":"ref72","first-page":"1540","article-title":"Optimizing millions of hyperparameters by implicit differentiation","author":"lorraine","year":"2020","journal-title":"Proc Int Conf Artif Intell Stat"},{"key":"ref71","first-page":"2113","article-title":"Gradient-based hyperparameter optimization through reversible learning","author":"maclaurin","year":"2015","journal-title":"Proc 32nd Int Conf Int Conf Mach Learn"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/s13675-019-00115-7"},{"key":"ref76","first-page":"136","article-title":"OptNet: Differentiable optimization as a layer in neural networks","author":"amos","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref77","first-page":"21725","article-title":"Delta-STN: Efficient bilevel optimization for neural networks using structured response Jacobians","author":"bae","year":"2020","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref74","article-title":"Hyperparameter optimization with approximate gradient","author":"pedregosa","year":"2016"},{"key":"ref75","article-title":"UFO-BLO: Unbiased first-order bilevel optimization","author":"likhosherstov","year":"2020"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00755"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00459"},{"key":"ref60","article-title":"GL-GAN: Adaptive global and local bilevel optimization model of image generation","author":"liu","year":"2020"},{"key":"ref62","article-title":"A two-timescale framework for bilevel optimization: Complexity analysis and application to actor-critic","author":"hong","year":"2020"},{"key":"ref61","first-page":"9779","article-title":"On the global optimality of model-agnostic meta-learning: Reinforcement learning and supervised learning","author":"wang","year":"2020","journal-title":"Proc Ind Eng Manage Sci"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/BF02098169"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.2307\/2583177"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.sbspro.2013.05.039"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/s11831-017-9216-5"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-75714-8_6"},{"key":"ref68","article-title":"On differentiating parameterized Argmin and Argmax problems with application to bi-level optimization","author":"gould","year":"2016"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1137\/140968045"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1007\/BF00121269"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2014.02.057"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_9"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-011-9941-0"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1007\/s10589-020-00178-y"},{"key":"ref195","first-page":"6882","article-title":"A value-function-based interior-point method for non-convex bi-level optimization","author":"liu","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1137\/120864015"},{"key":"ref95","article-title":"Es-MAML: Simple hessian-free meta learning","author":"song","year":"2020"},{"key":"ref190","first-page":"632","article-title":"Deep bilevel learning","author":"jenni","year":"2018","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref94","first-page":"1534","article-title":"Efficient meta learning via minibatch proximal update","author":"zhou","year":"2019","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref191","first-page":"3748","article-title":"On the iteration complexity of hypergradient computation","author":"grazzi","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref93","article-title":"Reptile: A scalable metalearning algorithm","author":"nichol","year":"2018"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1080\/02331930701617551"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6466"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00620"},{"key":"ref90","article-title":"Alpha MAML: Adaptive model-agnostic meta-learning","author":"behl","year":"2019"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00357"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6887"},{"key":"ref96","first-page":"1566","article-title":"Learning-to-learn stochastic gradient descent with biased regularization","author":"denevi","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref97","article-title":"Gradient-based meta-learning with learned layerwise metric and subspace","author":"lee","year":"2018"},{"key":"ref82","article-title":"Meta-learning with latent embedding optimization","author":"rusu","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref81","article-title":"Learning to optimize","author":"li","year":"2016"},{"key":"ref84","first-page":"113","article-title":"Meta-learning with implicit gradients","author":"rajeswaran","year":"2019","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref83","first-page":"7693","article-title":"Fast context adaptation via meta-learning","author":"zintgraf","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref80","article-title":"A simple neural attentive meta-learner","author":"mishra","year":"2018"},{"key":"ref89","article-title":"On first-order meta-learning algorithms","author":"nichol","year":"2018"},{"key":"ref85","article-title":"Meta-learning with differentiable closed-form solvers","author":"bertinetto","year":"2019"},{"key":"ref86","first-page":"3309","article-title":"Meta-curvature","author":"park","year":"2019","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref87","article-title":"Optimization as a model for few-shot learning","author":"ravi","year":"2016","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref88","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"finn","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref200","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s40304-017-0103-z","article-title":"A proposal on machine learning via dynamical systems","volume":"5","author":"weinan","year":"2017","journal-title":"Commun Math Stat"},{"key":"ref101","first-page":"8366","article-title":"Transfer learning with neural automl","author":"wong","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053112"},{"key":"ref209","article-title":"Rapid learning or feature reuse? Towards understanding the effectiveness of MAML","author":"raghu","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1145\/1330017.1330018"},{"key":"ref204","first-page":"2214","article-title":"The reversible residual network: Backpropagation without storing activations","author":"gomez","year":"2017","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref201","first-page":"5181","article-title":"Beyond finite layer neural networks: Bridging deep architectures and numerical differential equations","author":"lu","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref202","first-page":"5595","article-title":"Automatic differentiation in machine learning: A survey","volume":"18","author":"baydin","year":"2017","journal-title":"J Mach Learn Res"},{"key":"ref207","first-page":"2952","article-title":"Scalable gradient-based tuning of continuous regularization hyperparameters","author":"luketina","year":"2016","journal-title":"Proc 33rd Int Conf Int Conf Mach Learn"},{"key":"ref208","article-title":"Online learning rate adaptation with hypergradient descent","author":"baydin","year":"2017"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1080\/10556788.2015.1025400"},{"key":"ref206","article-title":"Forward-mode automatic differentiation in julia","author":"revels","year":"2016"},{"key":"ref211","volume":"317","author":"rockafellar","year":"2009","journal-title":"Variational Analysis"},{"key":"ref210","article-title":"Convergence of meta-learning with task-specific adaptation over partial parameters","author":"ji","year":"2020"},{"key":"ref212","article-title":"An explicit descent method for bilevel convex optimization","volume":"14","author":"solodov","year":"2007","journal-title":"J Convex Anal"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1137\/16M105592X"},{"key":"ref214","first-page":"1082","article-title":"On the convergence theory of gradient-based model-agnostic meta-learning algorithms","author":"fallah","year":"2020","journal-title":"Proc SIATATS Conf"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1080\/02331934.2012.696641"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1090\/fic\/055\/10"},{"key":"ref217","first-page":"57","article-title":"On global search for pessimistic solution in bilevel problems","volume":"18","author":"malyshev","year":"2013","journal-title":"Int J Biomed Soft Comput Hum Sci Off J Biomed Fuzzy Syst Assoc"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2016.08.033"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.3390\/en14082095"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1287\/ijoc.2019.0927"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-010-0342-1"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_14"},{"key":"ref229","article-title":"BiAdam: Fast adaptive bilevel optimization methods","author":"huang","year":"2021"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1080\/10556788.2019.1619729"},{"key":"ref227","article-title":"Approximation methods for bilevel programming","author":"ghadimi","year":"2018"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1016\/j.omega.2017.06.007"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-020-01582-2"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_8"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1007\/BF01585934"},{"key":"ref127","first-page":"11044","article-title":"Generative adversarial imitation learning with neural network parameterization: Global optimality and convergence rate","author":"zhang","year":"2020","journal-title":"Proc 37th Int Conf Mach Learn"},{"key":"ref126","first-page":"4147","article-title":"Learner-aware teaching: Inverse reinforcement learning with preferences and constraints","author":"tschiatschek","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref125","article-title":"Research on distributed renewable energy transaction decision-making based on multi-agent bilevel cooperative reinforcement learning","author":"chen","year":"2019","journal-title":"Proc Int Conf Electricity Distribution"},{"key":"ref124","first-page":"1881","article-title":"K-beam minimax: Efficient optimization for deep adversarial learning","author":"hamm","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref129","article-title":"GraphNas: Graph neural architecture search with reinforcement learning","author":"gao","year":"2019"},{"key":"ref128","first-page":"1566","article-title":"Hierarchical cooperative multi-agent reinforcement learning with skill discovery","author":"yang","year":"2020","journal-title":"Proc Auton Agents Multi-Agent Syst"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2019.2936142"},{"key":"ref133","article-title":"Generative adversarial imitation from observation","author":"torabi","year":"2018"},{"key":"ref134","first-page":"4061","article-title":"Taming MAML: Efficient unbiased meta-reinforcement learning","author":"liu","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref131","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref132","first-page":"3812","article-title":"InfoGAIL: Interpretable imitation learning from visual demonstrations","author":"li","year":"2017","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref232","first-page":"4055","article-title":"Image transformer","author":"parmar","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref230","first-page":"5142","article-title":"Towards understanding knowledge distillation","author":"phuong","year":"2019","journal-title":"Proc 36 Int Conf Mach Learn"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992393"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3043125"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1137\/120882706"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2623487"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3026114"},{"key":"ref139","first-page":"1187","article-title":"Learning end-to-end video classification with rank-pooling","author":"fernando","year":"2016","journal-title":"Proc 33rd Int Conf Int Conf Mach Learn"},{"key":"ref140","article-title":"Spectral inference networks: Unifying deep and spectral learning","author":"pfau","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref141","article-title":"Bilevel parameter optimization for nonlocal image denoising models","author":"d\u2019elia","year":"2019"},{"key":"ref142","first-page":"111","article-title":"Learning intrinsic rewards as a bi-level optimization problem","author":"stadie","year":"2020","journal-title":"Proc Conf Uncertainty of Artificial Intelligence"},{"key":"ref143","article-title":"Learning deformable image registration from optimization: perspective, modules, bilevel training and beyond","author":"liu","year":"2021"},{"key":"ref2","author":"von stackelberg","year":"1952","journal-title":"The Theory of the Market Economy"},{"key":"ref144","article-title":"Bilevel continual learning","author":"pham","year":"2020"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52119-6_20"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3004733"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01210"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6143"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01238"},{"key":"ref106","first-page":"493","article-title":"Asap: Architecture search, anneal and prune","author":"noy","year":"2020","journal-title":"Proc 23rd Int Conf Artif Intell Stat"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00138"},{"key":"ref104","article-title":"Efficient neural architecture search via parameter sharing","author":"pham","year":"2018"},{"key":"ref103","article-title":"SNAS: Stochastic neural architecture search","author":"xie","year":"2019"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01188"},{"key":"ref111","first-page":"6638","article-title":"DetNAS: Backbone search for object detection","author":"chen","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00675"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00017"},{"key":"ref10","first-page":"1","article-title":"On lp-hyperparameter learning via bilevel nonsmooth optimization","volume":"22","author":"okuno","year":"2021"},{"key":"ref11","article-title":"A bridge between hyperparameter optimization and learning-to-learn","author":"franceschi","year":"2017"},{"key":"ref12","first-page":"1568","article-title":"Bilevel programming for hyperparameter optimization and meta-learning","author":"franceschi","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref13","first-page":"1723","article-title":"Truncated back-propagation for bilevel optimization","author":"shaban","year":"2019","journal-title":"Proc 22nd Int Conf Artif Intell Stat"},{"key":"ref14","article-title":"DARTS: Differentiable architecture search","author":"liu","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_8"},{"key":"ref16","article-title":"Towards fast adaptation of neural architectures with meta learning","author":"lian","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01201"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.01.013"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01142"},{"key":"ref18","article-title":"Learning to defense by learning to attack","author":"jiang","year":"2018"},{"key":"ref119","article-title":"Differentiable neural input search for recommender systems","author":"cheng","year":"2020"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3099829"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00418"},{"key":"ref113","first-page":"874","article-title":"Data: Differentiable architecture approximation","author":"chang","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403122"},{"key":"ref115","article-title":"Auto-GNN: Neural architecture search of graph neural networks","author":"zhou","year":"2019"},{"key":"ref120","article-title":"Unrolled generative adversarial networks","author":"metz","year":"2020"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6490"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00572"},{"key":"ref123","first-page":"4880","article-title":"What is local optimality in nonconvex-nonconcave minimax optimization?","author":"jin","year":"2020","journal-title":"Proc Int Conf Mach Learn"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9940445\/09638340.pdf?arnumber=9638340","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T22:38:15Z","timestamp":1670279895000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9638340\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":232,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3132674","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}