{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T03:29:43Z","timestamp":1777865383519,"version":"3.51.4"},"reference-count":69,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004377","name":"PolyU","doi-asserted-by":"publisher","award":["15224823"],"award-info":[{"award-number":["15224823"]}],"id":[{"id":"10.13039\/501100004377","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["62302246"],"award-info":[{"award-number":["62302246"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00252","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"2620-2630","source":"Crossref","is-referenced-by-count":0,"title":["ULTHO: Ultra-Lightweight Yet Efficient Hyperparameter Optimization in Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Mingqi","family":"Yuan","sequence":"first","affiliation":[{"name":"The Hong Kong Polytechnic University,Department of Computing,Hong Kong SAR,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University,Department of Computing,Hong Kong SAR,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Jin","sequence":"additional","affiliation":[{"name":"Ningbo Institute of Digital Twin, Eastern Institute of Technology,Ningbo,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjun","family":"Zeng","sequence":"additional","affiliation":[{"name":"Ningbo Institute of Digital Twin, Eastern Institute of Technology,Ningbo,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume":"34","author":"Agarwal","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref2","first-page":"421","article-title":"Atari-5: Distilling the arcade learning environment down to five games","volume-title":"International Conference on Machine Learning","author":"Aitchison","year":"2023"},{"key":"ref3","article-title":"What matters for onpolicy deep actor-critic methods? a large-scale study","volume-title":"International Conference on Learning Representations","author":"Andrychowicz","year":"2021"},{"key":"ref4","first-page":"397","article-title":"Using confidence bounds for exploitationexploration trade-offs","author":"Auer","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref6","article-title":"Training a helpful and harmless assistant with reinforcement learning from human feedback","author":"Bai","year":"2022","journal-title":"arXiv preprint"},{"issue":"2","key":"ref7","first-page":"2","article-title":"A hierarchical two-tier approach to hyper-parameter optimization in reinforcement learning","volume":"19","author":"Cruz Barsce","year":"2020","journal-title":"SADIO Electronic Journal of Informatics and Operations Research"},{"key":"ref8","article-title":"Arlbench: Flexible and efficient benchmarking for hyperparameter optimization in reinforcement learning","author":"Becktepe","journal-title":"Seventeenth European Workshop on Reinforcement Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038"},{"issue":"2","key":"ref11","article-title":"Random search for hyper-parameter optimization","volume":"13","author":"Bergstra","year":"2012","journal-title":"Journal of machine learning research"},{"key":"ref12","article-title":"Deepseek 11m: Scaling opensource language models with longtermism","author":"Bi","year":"2024","journal-title":"arXiv preprint"},{"key":"ref13","first-page":"22234","article-title":"Evograd: Efficient gradient-based meta-learning and hyperparameter optimization","volume":"34","author":"Bohdal","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/305"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.52202\/075280-3209"},{"key":"ref16","first-page":"2048","article-title":"Leveraging procedural generation to benchmark reinforcement learning","volume-title":"International conference on machine learning","author":"Cobbe","year":"2020"},{"key":"ref17","first-page":"2016","author":"Coumans","journal-title":"Pybullet"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2956703"},{"key":"ref19","first-page":"9104","article-title":"Hyperparameters in reinforcement learning and how to tune them","volume-title":"International Conference on Machine Learning","author":"Eimer","year":"2023"},{"key":"ref20","article-title":"Implementation matters in deep rl: A case study on ppo and trpo","volume-title":"International Conference on Learning Representations","author":"Engstrom","year":"2020"},{"key":"ref21","article-title":"Bohb: Robust and efficient hyperparameter optimization at scale","volume-title":"International conference on machine learning","author":"Falkner","year":"2018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-022-05172-4"},{"key":"ref23","article-title":"Bootstrapped meta-learning","volume-title":"International Conference on Learning Representations","author":"Flennerhag","year":"2022"},{"key":"ref24","article-title":"Sample-efficient automated deep reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Franke","year":"2021"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-08032-5"},{"key":"ref26","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja","year":"2018"},{"key":"ref27","first-page":"2681","article-title":"Provably efficient maximum entropy exploration","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Hazan","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref29","article-title":"Revisiting design choices in proximal policy optimization","author":"Hsu","year":"2020","journal-title":"arXiv preprint"},{"key":"ref30","article-title":"The 37im- plementation details of proximal policy optimization","author":"Huang","year":"2022","journal-title":"The ICLR Blog Track 2023"},{"issue":"274","key":"ref31","first-page":"1","article-title":"Cleanrl: High-quality single-file implementations of deep reinforcement learning algorithms","volume":"23","author":"Huang","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2021.3077193"},{"key":"ref33","article-title":"Population based training of neural networks","author":"Jaderberg","year":"2017","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02474"},{"key":"ref36","article-title":"Hyp-rl: Hyperparameter optimization by reinforcement learning","author":"Jomaa","journal-title":"arXiv preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref38","volume-title":"Pytorch implementations of reinforcement learning algorithms","author":"Kostrikov","year":"2018"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.2174\/1566524018666180611074856"},{"issue":"54","key":"ref40","first-page":"1","article-title":"Smac3: A versatile bayesian optimization package for hyperparameter optimization","volume":"23","author":"Lindauer","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref41","article-title":"Deepseek-v2: A strong, economical, and efficient mixture-of-experts language model","author":"Liu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref42","first-page":"2113","article-title":"Gradient-based hyperparameter optimization through reversible learning","volume-title":"International conference on machine learning","author":"Maclaurin","year":"2015"},{"issue":"7964","key":"ref43","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1038\/s41586-023-06004-9","article-title":"Faster sorting algorithms discovered using deep reinforcement learning","volume":"618","author":"Daniel","year":"2023","journal-title":"Nature"},{"key":"ref44","article-title":"Craftax: A lightningfast benchmark for open-ended reinforcement learning","volume-title":"International Conference on Machine Learning. PMLR","author":"Matthews","year":"2024"},{"key":"ref45","first-page":"10798","article-title":"Gradient-based hyperparameter optimization over long horizons","volume":"34","author":"Micaelli","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2011"},{"key":"ref48","first-page":"17200","article-title":"Provably efficient online hyperparameter optimization with population-based bandits","volume":"33","author":"Parker-Holder","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref49","first-page":"15513","article-title":"Tuning mixed input hyperparameters on the fly for efficient population based autorl","volume":"34","author":"Parker-Holder","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref50","article-title":"Fast efficient hyperparameter tuning for policy gradient methods","author":"Paul","year":"2019","journal-title":"Advances in Neural Information Processing Systems, 32"},{"issue":"268","key":"ref51","first-page":"1","article-title":"Stablebaselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref52","first-page":"5402","article-title":"Automatic data augmentation for generalization in reinforcement learning","volume":"34","author":"Raileanu","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref53","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Schulman","year":"2015"},{"key":"ref54","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref56","article-title":"Practical bayesian optimization of machine learning algorithms","author":"Snoek","year":"2012","journal-title":"Advances in neural information processing systems, 25"},{"key":"ref57","article-title":"Deepmind control suite","author":"Tassa","year":"2018","journal-title":"arXiv preprint"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCMA63715.2024.10843882"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/s12530-020-09345-2"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref61","first-page":"14","article-title":"Bayesian generational population-based training","volume-title":"International conference on automated machine learning","author":"Wan","year":"2022"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.11989\/JEST.1674-862X.80904120"},{"key":"ref63","first-page":"2402","article-title":"Metagradient reinforcement learning","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"Xu","year":"2018"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.07.061"},{"key":"ref65","first-page":"40531","article-title":"Automatic intrinsic reward shaping for exploration in deep reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Yuan","year":"2023"},{"key":"ref66","article-title":"Adaptive data exploitation in deep reinforcement learning","author":"Yuan","year":"2025","journal-title":"arXiv preprint"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i28.35378"},{"key":"ref68","first-page":"20913","article-title":"A self-tuning actor-critic algorithm","volume":"33","author":"Zahavy","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref69","first-page":"4015","article-title":"On the importance of hyperparameter optimization for model-based reinforcement learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Zhang","year":"2021"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445661.pdf?arnumber=11445661","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T06:23:47Z","timestamp":1777530227000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445661\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":69,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00252","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}