{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T19:12:43Z","timestamp":1764270763907,"version":"3.46.0"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325101","62031001","62405014","92367204"],"award-info":[{"award-number":["62325101","62031001","62405014","92367204"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Laboratory of Unmanned Aerial Vehicle Technology in NPU","award":["WR202403"],"award-info":[{"award-number":["WR202403"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/tai.2025.3540799","type":"journal-article","created":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T13:33:20Z","timestamp":1739280800000},"page":"2148-2162","source":"Crossref","is-referenced-by-count":1,"title":["High-Dimensional Hyperparameter Optimization via Adjoint Differentiation"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6185-5369","authenticated-orcid":false,"given":"Hongkun","family":"Dou","sequence":"first","affiliation":[{"name":"School of Astronautics, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0504-2555","authenticated-orcid":false,"given":"Hongjue","family":"Li","sequence":"additional","affiliation":[{"name":"School of Astronautics, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3201-0832","authenticated-orcid":false,"given":"Jinyang","family":"Du","sequence":"additional","affiliation":[{"name":"School of Astronautics, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2351-4461","authenticated-orcid":false,"given":"Leyuan","family":"Fang","sequence":"additional","affiliation":[{"name":"College of Electrical and Information Engineering, Hunan University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8000-7736","authenticated-orcid":false,"given":"Qing","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Automation Science and Electrical Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2871-8922","authenticated-orcid":false,"given":"Yue","family":"Deng","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-9834","authenticated-orcid":false,"given":"Wen","family":"Yao","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, Chinese Academy of Military Science, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05318-5_1"},{"article-title":"Hyperparameter optimization: A spectral approach","year":"2017","author":"Hazan","key":"ref2"},{"key":"ref3","first-page":"3460","article-title":"Speeding up automatic hyperparameter optimization of deep neural networks by extrapolation of learning curves","volume-title":"Proc. 24th Int. Joint Conf. Artif. Intell.","author":"Domhan","year":"2015"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.5555\/2188385.2188395"},{"key":"ref5","first-page":"2951","article-title":"Practical Bayesian optimization of machine learning algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"25","author":"Snoek","year":"2012"},{"issue":"1","key":"ref6","first-page":"3098","article-title":"Tuning hyperparameters without grad students: Scalable and robust bayesian optimisation with dragonfly","volume":"21","author":"Kandasamy","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref7","first-page":"1919","article-title":"Meta-weight-net: Learning an explicit mapping for sample weighting","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Shu","year":"2019"},{"key":"ref8","first-page":"3163","article-title":"AutoBalance: Optimized loss functions for imbalanced data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Li","year":"2021"},{"key":"ref9","first-page":"1540","article-title":"Optimizing millions of hyperparameters by implicit differentiation","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Lorraine","year":"2020"},{"article-title":"GABO: Graph augmentations with bi-level optimization","year":"2021","author":"Chung","key":"ref10"},{"key":"ref11","first-page":"1","article-title":"Improving the accuracy of learning example weights for imbalance classification","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Liu","year":"2021"},{"article-title":"Auxiliary learning by implicit differentiation","year":"2020","author":"Navon","key":"ref12"},{"key":"ref13","first-page":"9983","article-title":"Optimizing data usage via differentiable rewards","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19797-0_4"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-007-0176-2"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3132674"},{"key":"ref17","first-page":"1165","article-title":"Forward and reverse gradient-based hyperparameter optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Franceschi","year":"2017"},{"key":"ref18","first-page":"1568","article-title":"Bilevel programming for hyperparameter optimization and meta-learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Franceschi","year":"2018"},{"key":"ref19","first-page":"3988","article-title":"Learning to learn by gradient descent by gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Andrychowicz","year":"2016"},{"key":"ref20","first-page":"2952","article-title":"Scalable gradient-based tuning of continuous regularization hyperparameters","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Luketina","year":"2016"},{"key":"ref21","first-page":"737","article-title":"Hyperparameter optimization with approximate gradient","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pedregosa","year":"2016"},{"article-title":"Bi-level physics-informed neural networks for PDE constrained optimization using Broyden\u2019s hypergradients","year":"2022","author":"Hao","key":"ref22"},{"article-title":"Online learning rate adaptation with hypergradient descent","year":"2017","author":"Baydin","key":"ref23"},{"article-title":"Understanding short-horizon bias in stochastic meta-optimization","year":"2018","author":"Wu","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-5981-1"},{"key":"ref26","first-page":"2171","article-title":"Scalable Bayesian optimization using deep neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Snoek","year":"2015"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93025-1_4"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00660"},{"article-title":"Population based training of neural networks","year":"2017","author":"Jaderberg","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2834892.2834896"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8297018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2020.2980289"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2021.3060833"},{"key":"ref34","first-page":"2113","article-title":"Gradient-based hyperparameter optimization through reversible learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Maclaurin","year":"2015"},{"key":"ref35","first-page":"21725","article-title":"Delta-STN: Efficient bilevel optimization for neural networks using structured response Jacobians","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Bae","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-49430-8_6"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015187"},{"key":"ref38","first-page":"1","article-title":"Automatic differentiation in machine learning: A survey","volume":"18","author":"Baydin","year":"2018","journal-title":"J. Mach. Learn. Res."},{"key":"ref39","first-page":"318","article-title":"Generic methods for optimization-based modeling","volume-title":"Proc Artif. Intell. Statist.","author":"Domke","year":"2012"},{"key":"ref40","first-page":"377","article-title":"Efficient multiple hyperparameter learning for log-linear models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"20","author":"Foo","year":"2007"},{"key":"ref41","first-page":"113","article-title":"Meta-learning with implicit gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Rajeswaran","year":"2019"},{"article-title":"DrMAD: distilling reverse-mode automatic differentiation for optimizing hyperparameters of deep neural networks","year":"2016","author":"Fu","key":"ref42"},{"article-title":"Self-tuning networks: Bilevel optimization of hyperparameters using structured best-response functions","year":"2019","author":"MacKay","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412010"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127909"},{"article-title":"Principled penalty-based methods for bilevel reinforcement learning and RLHF","year":"2024","author":"Shen","key":"ref46"},{"key":"ref47","first-page":"54167","article-title":"Task-aware world model learning with meta weighting via bi-level optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Yuan","year":"2024"},{"article-title":"Online hyperparameter meta-learning with hypergradient distillation","year":"2021","author":"Lee","key":"ref48"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827501380630"},{"key":"ref50","first-page":"6572","article-title":"Neural ordinary differential equations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Chen","year":"2018"},{"volume-title":"Numerical Methods for Initial Value Problems in Ordinary Differential Equations","year":"2014","author":"Fatunla","key":"ref51"},{"key":"ref52","first-page":"8459","article-title":"Learning to simulate complex physics with graph networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sanchez-Gonzalez","year":"2020"},{"article-title":"Hamiltonian graph networks with ODE integrators","year":"2019","author":"Sanchez-Gonzalez","key":"ref53"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/29.7552"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00591"},{"key":"ref56","first-page":"5570","article-title":"Learning iterative reasoning through energy minimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Du","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/BF01128757"},{"key":"ref58","first-page":"24430","article-title":"Adaptive inertia: Disentangling the effects of adaptive learning rate and momentum","volume-title":"Int. Conf. Mach. Learn.","author":"Xie","year":"2022"},{"key":"ref59","first-page":"18970","article-title":"Label-imbalanced and group-sensitive classification under overparameterization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Kini","year":"2021"},{"key":"ref60","first-page":"1567","article-title":"Learning imbalanced datasets with label-distribution-aware margin loss","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Cao","year":"2019"},{"article-title":"Learning multiple layers of features from tiny images","year":"2009","author":"Krizhevsky","key":"ref61"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"article-title":"Long-tail learning via logit adjustment","year":"2020","author":"Menon","key":"ref63"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.4135\/9781071810118"},{"key":"ref66","first-page":"10789","article-title":"Searching to exploit memorization effect in learning with noisy labels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yao","year":"2020"},{"key":"ref67","article-title":"Self-paced learning for latent variable models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"23","author":"Kumar","year":"2010"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9078688\/11106307\/10880096.pdf?arnumber=10880096","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T19:01:13Z","timestamp":1764270073000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10880096\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":69,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tai.2025.3540799","relation":{},"ISSN":["2691-4581"],"issn-type":[{"type":"electronic","value":"2691-4581"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}