{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T20:00:06Z","timestamp":1768680006769,"version":"3.49.0"},"publisher-location":"Cham","reference-count":81,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031919787","type":"print"},{"value":"9783031919794","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91979-4_8","type":"book-chapter","created":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:07:07Z","timestamp":1748718427000},"page":"75-96","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Improving Hyperparameter Optimization with\u00a0Checkpointed Model Weights"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7277-8466","authenticated-orcid":false,"given":"Nikhil","family":"Mehta","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1255-6554","authenticated-orcid":false,"given":"Jonathan","family":"Lorraine","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3803-9760","authenticated-orcid":false,"given":"Steve","family":"Masson","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8934-0981","authenticated-orcid":false,"given":"Ramanathan","family":"Arunachalam","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3950-5456","authenticated-orcid":false,"given":"Zaid Pervaiz","family":"Bhat","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4580-7937","authenticated-orcid":false,"given":"James","family":"Lucas","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5608-9089","authenticated-orcid":false,"given":"Arun George","family":"Zachariah","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"8_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-642-35289-8_26","volume-title":"Neural Networks: Tricks of the Trade","author":"Y Bengio","year":"2012","unstructured":"Bengio, Y.: Practical recommendations for gradient-based training of deep architectures. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 437\u2013478. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35289-8_26"},{"key":"8_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05318-5","volume-title":"Automated Machine Learning: Methods, Systems, Challenges","author":"F Hutter","year":"2019","unstructured":"Hutter, F., Kotthoff, L., Vanschoren, J.: Automated Machine Learning: Methods, Systems, Challenges. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-05318-5"},{"key":"8_CR3","unstructured":"Yu, T., Zhu, H.: Hyper-parameter optimization: a review of algorithms and applications. arXiv preprint arXiv:2003.05689 (2020)"},{"key":"8_CR4","unstructured":"Bergstra, J., Bengio, Y.: Random search for hyper-parameter optimization. J. Mach. Learn. Res. 13(2) (2012)"},{"key":"8_CR5","series-title":"The Springer Series on Challenges in Machine Learning","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-05318-5_1","volume-title":"Automated Machine Learning","author":"M Feurer","year":"2019","unstructured":"Feurer, M., Hutter, F.: Hyperparameter optimization. In: Hutter, F., Kotthoff, L., Vanschoren, J. (eds.) Automated Machine Learning. TSSCML, pp. 3\u201333. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-05318-5_1"},{"key":"8_CR6","unstructured":"Snoek, J., Larochelle, H., Adams, R.P.: Practical Bayesian optimization of machine learning algorithms. In: Advances in Neural Information Processing Systems, vol. 25 (2012)"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Thornton, C., Hutter, F., Hoos, H.H., Leyton-Brown, K.: Auto-WEKA: combined selection and hyperparameter optimization of classification algorithms. In: Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 847\u2013855 (2013)","DOI":"10.1145\/2487575.2487629"},{"key":"8_CR8","series-title":"Springer Series in Statistics","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84858-7","volume-title":"The Elements of Statistical Learning","author":"T Hastie","year":"2009","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.: The Elements of Statistical Learning. SSS, Springer, New York (2009). https:\/\/doi.org\/10.1007\/978-0-387-84858-7"},{"issue":"185","key":"8_CR9","first-page":"1","volume":"18","author":"L Li","year":"2018","unstructured":"Li, L., Jamieson, K., DeSalvo, G., Rostamizadeh, A., Talwalkar, A.: Hyperband: a novel bandit-based approach to hyperparameter optimization. J. Mach. Learn. Res. 18(185), 1\u201352 (2018)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR10","unstructured":"Wolf, T., et\u00a0al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345 (2020)"},{"key":"8_CR11","unstructured":"Arango, S.P., Ferreira, F., Kadra, A., Hutter, F., Grabocka, J.: Quick-tune: quickly learning which pretrained model to finetune and how (2024)"},{"key":"8_CR12","unstructured":"You, K., Liu, Y., Wang, J., Long, M.: LogME: practical assessment of pre-trained models for transfer learning. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp. 12133\u201312143. PMLR, 18\u201324 July 2021"},{"key":"8_CR13","unstructured":"Nguyen, C., Hassner, T., Seeger, M., Archambeau, C.: LEEP: a new measure to evaluate transferability of learned representations. In: International Conference on Machine Learning, pp. 7294\u20137305. PMLR (2020)"},{"key":"8_CR14","unstructured":"Chen, Y., et al.: Towards learning universal hyperparameter optimizers with transformers (2022)"},{"key":"8_CR15","unstructured":"Wistuba, M., Kadra, A., Grabocka, J.: Supervising the multi-fidelity race of hyperparameter configurations (2023)"},{"key":"8_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"2","key":"8_CR17","doi-asserted-by":"publisher","DOI":"10.1115\/1.4052221","volume":"144","author":"L Wang","year":"2022","unstructured":"Wang, L., Yerramilli, S., Iyer, A., Apley, D., Zhu, P., Chen, W.: Scalable gaussian processes for data-driven design using big data with categorical factors. J. Mech. Des. 144(2), 021703 (2022)","journal-title":"J. Mech. Des."},{"key":"8_CR18","unstructured":"Antoran, J.: Scalable Bayesian inference in the era of deep learning: from Gaussian processes to deep neural networks. arXiv preprint arXiv:2404.19157 (2024)"},{"key":"8_CR19","unstructured":"Swersky, K., Snoek, J., Adams, R.P.: Multi-task Bayesian optimization. In: Advances in Neural Information Processing Systems, vol. 26 (2013)"},{"key":"8_CR20","unstructured":"Lim, D., Maron, H., Law, M.T., Lorraine, J., Lucas, J.: Graph metanetworks for processing diverse neural architectures (2023)"},{"key":"8_CR21","unstructured":"Unterthiner, T., Keysers, D., Gelly, S., Bousquet, O., Tolstikhin, I.: Predicting neural network accuracy from weights (2020)"},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. CoRR, abs\/1512.03385 (2015)","DOI":"10.1109\/CVPR.2016.90"},{"key":"8_CR23","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. CoRR, abs\/2010.11929 (2020)"},{"issue":"11","key":"8_CR24","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"8_CR25","unstructured":"Zaheer, M., Kottur, S., Ravanbakhsh, S., Poczos, B., Salakhutdinov, R.R., Smola, A.J.: Deep sets. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Kendall, M.G.: A new measure of rank correlation. Biometrika 30(1\/2), 81\u201393 (1938)","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"8_CR28","unstructured":"Zhou, A., et al.: Permutation equivariant neural functionals (2023)"},{"key":"8_CR29","unstructured":"Van\u00a0Rossum, G., Drake, F.L.\u00a0Jr.: Python reference manual. Centrum voor Wiskunde en Informatica Amsterdam (1995)"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Oliphant, T.E.: Python for scientific computing. Comput. Sci. Eng. (2007)","DOI":"10.1109\/MCSE.2007.58"},{"key":"8_CR31","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch. Openreview (2017)"},{"key":"8_CR32","unstructured":"Fey, M., Lenssen, J.E.: Fast graph representation learning with Pytorch geometric. arXiv preprint arXiv:1903.02428 (2019)"},{"key":"8_CR33","unstructured":"Gardner, J., Pleiss, G., Weinberger, K.Q., Bindel, D., Wilson, A.G.: GPyTorch: blackbox matrix-matrix Gaussian process inference with GPU acceleration. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"8_CR34","doi-asserted-by":"crossref","unstructured":"Hunter, J.D.: Matplotlib: a 2D graphics environment. Comput. Sci. Eng. (2007)","DOI":"10.1109\/MCSE.2007.55"},{"key":"8_CR35","doi-asserted-by":"crossref","unstructured":"Feurer, M., Hutter, F.: Hyperparameter optimization. In: Automated Machine Learning: Methods, Systems, Challenges, pp. 3\u201333 (2019)","DOI":"10.1007\/978-3-030-05318-5_1"},{"issue":"2","key":"8_CR36","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1484","volume":"13","author":"B Bischl","year":"2023","unstructured":"Bischl, B., et al.: Hyperparameter optimization: foundations, algorithms, best practices, and open challenges. Wiley Interdisc. Rev. Data Mining Knowl. Discov. 13(2), e1484 (2023)","journal-title":"Wiley Interdisc. Rev. Data Mining Knowl. Discov."},{"key":"8_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106622","volume":"212","author":"X He","year":"2021","unstructured":"He, X., Zhao, K., Chu, X.: AutoML: a survey of the state-of-the-art. Knowl.-Based Syst. 212, 106622 (2021)","journal-title":"Knowl.-Based Syst."},{"key":"8_CR38","unstructured":"Lorraine, J., Anderson, N., Lee, C., De\u00a0Laroussilhe, Q., Hassen, M.: Task selection for AutoML system evaluation. arXiv preprint arXiv:2208.12754 (2022)"},{"key":"8_CR39","unstructured":"Vicol, P.A.: On bilevel optimization without full unrolls: methods and applications. Ph.D. thesis, University of Toronto (Canada) (2023)"},{"key":"8_CR40","unstructured":"Maclaurin, D., Duvenaud, D., Adams, R.: Gradient-based hyperparameter optimization through reversible learning. In: International Conference on Machine Learning, pp. 2113\u20132122. PMLR (2015)"},{"key":"8_CR41","unstructured":"Raghu, A., Lorraine, J., Kornblith, S., McDermott, M., Duvenaud, D.K.: Meta-learning to improve pre-training. In: Advances in Neural Information Processing Systems, vol. 34, pp. 23231\u201323244 (2021)"},{"key":"8_CR42","unstructured":"Franceschi, L., Donini, M., Frasconi, P., Pontil, M.: Forward and reverse gradient-based hyperparameter optimization. In: International Conference on Machine Learning, pp. 1165\u20131173. PMLR (2017)"},{"key":"8_CR43","unstructured":"Lorraine, J., Vicol, P., Duvenaud, D.: Optimizing millions of hyperparameters by implicit differentiation. In: International Conference on Artificial Intelligence and Statistics, pp. 1540\u20131552. PMLR (2020)"},{"key":"8_CR44","unstructured":"Lorraine, J.: Scalable nested optimization for deep learning. Ph.D. thesis, University of Toronto (Canada) (2024)"},{"key":"8_CR45","unstructured":"Lorraine, J., Duvenaud, D.: Stochastic hyperparameter optimization through hypernetworks. arXiv preprint arXiv:1802.09419 (2018)"},{"key":"8_CR46","unstructured":"Mackay, M., Vicol, P., Lorraine, J., Duvenaud, D., Grosse, R.: Self-tuning networks: bilevel optimization of hyperparameters using structured best-response functions. In: International Conference on Learning Representations (2018)"},{"key":"8_CR47","unstructured":"Bae, J., Grosse, R.B.: Delta-STN: efficient bilevel optimization for neural networks using structured response Jacobians. In: Advances in Neural Information Processing Systems, vol. 33, pp. 21725\u201321737 (2020)"},{"key":"8_CR48","unstructured":"Bae, J., et al.: Multi-rate VAE: train once, get the full rate-distortion curve. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"8_CR49","doi-asserted-by":"crossref","unstructured":"Elsken, T., Metzen, J.H., Hutter, F.: Neural architecture search: a survey. J. Mach. Learn. Res. 20(55), 1\u201321 (2019)","DOI":"10.1007\/978-3-030-05318-5_11"},{"key":"8_CR50","unstructured":"Adam, G., Lorraine, J.: Understanding neural architecture search techniques. arXiv preprint arXiv:1904.00438 (2019)"},{"key":"8_CR51","unstructured":"White, C., et al.: Neural architecture search: insights from 1000 papers (2023)"},{"key":"8_CR52","doi-asserted-by":"crossref","unstructured":"Amos, B.: Tutorial on amortized optimization for learning to optimize over continuous domains. arXiv e-prints, arXiv\u20132202 (2022)","DOI":"10.1561\/9781638282099"},{"issue":"9","key":"8_CR53","first-page":"5149","volume":"44","author":"T Hospedales","year":"2021","unstructured":"Hospedales, T., Antoniou, A., Micaelli, P., Storkey, A.: Meta-learning in neural networks: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 5149\u20135169 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"8_CR54","doi-asserted-by":"crossref","unstructured":"Lorraine, J., et al.: ATT3D: amortized text-to-3D object synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17946\u201317956 (2023)","DOI":"10.1109\/ICCV51070.2023.01645"},{"key":"8_CR55","doi-asserted-by":"crossref","unstructured":"Xie, K., et al.: LATTE3D: large-scale amortized text-to-enhanced3D synthesis. arXiv preprint arXiv:2403.15385 (2024)","DOI":"10.1007\/978-3-031-72980-5_18"},{"key":"8_CR56","unstructured":"Bunne, C., Krause, A., Cuturi, M.: Supervised training of conditional Monge maps. In: Advances in Neural Information Processing Systems, vol. 35, pp. 6859\u20136872 (2022)"},{"key":"8_CR57","unstructured":"Zhang, M.R., Desai, N., Bae, J., Lorraine, J., Ba, J.: Using large language models for hyperparameter optimization. In: NeurIPS 2023 Foundation Models for Decision Making Workshop (2023)"},{"key":"8_CR58","unstructured":"Jamieson, K., Talwalkar, A.: Non-stochastic best arm identification and hyperparameter optimization. In: Artificial Intelligence and Statistics, pp. 240\u2013248. PMLR (2016)"},{"key":"8_CR59","unstructured":"Falkner, S., Klein, A., Hutter, F.: BOHB: robust and efficient hyperparameter optimization at scale. In: Dy, J., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a080, pp. 1437\u20131446. PMLR, 10\u201315 July 2018"},{"key":"8_CR60","unstructured":"Poloczek, M., Wang, J., Frazier, P.: Multi-information source optimization. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"8_CR61","unstructured":"Kandasamy, K., Dasarathy, G., Oliva, J.B., Schneider, J., P\u00f3czos, B.: Gaussian process bandit optimisation with multi-fidelity evaluations. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"8_CR62","unstructured":"Kandasamy, K., Dasarathy, G., Schneider, J., P\u00f3czos, B.: Multi-fidelity Bayesian optimisation with continuous approximations. In: International Conference on Machine Learning, pp. 1799\u20131808. PMLR (2017)"},{"key":"8_CR63","unstructured":"Takeno, S., et al.: Multi-fidelity Bayesian optimization with max-value entropy search and its parallelization. In: International Conference on Machine Learning (2020)"},{"key":"8_CR64","unstructured":"Krause, A., Ong, C.: Contextual Gaussian process bandit optimization. In: Advances in Neural Information Processing Systems, vol. 24 (2011)"},{"key":"8_CR65","unstructured":"Bardenet, R., Brendel, M., K\u00e9gl, B., Sebag, M.: Collaborative hyperparameter tuning. In: International Conference on Machine Learning, pp. 199\u2013207. PMLR (2013)"},{"key":"8_CR66","doi-asserted-by":"crossref","unstructured":"Poloczek, M., Wang, J., Frazier, P.I.: Warm starting Bayesian optimization. In: 2016 Winter simulation conference (WSC), pp. 770\u2013781. IEEE (2016)","DOI":"10.1109\/WSC.2016.7822140"},{"key":"8_CR67","unstructured":"Wistuba, M., Grabocka, J.: Few-shot Bayesian optimization with deep Kernel surrogates. arXiv preprint arXiv:2101.07667 (2021)"},{"key":"8_CR68","unstructured":"Feurer, M., Letham, B., Bakshy, E.: Scalable meta-learning for Bayesian optimization using ranking-weighted gaussian process ensembles. In: AutoML Workshop at ICML, vol.\u00a07, p.\u00a05 (2018)"},{"key":"8_CR69","unstructured":"Yogatama, D., Mann, G.: Efficient transfer learning method for automatic hyperparameter tuning. In: Artificial Intelligence and Statistics, pp. 1077\u20131085. PMLR (2014)"},{"key":"8_CR70","unstructured":"Perrone, V., Jenatton, R., Seeger, M.W., Archambeau, C.: Scalable hyperparameter transfer learning. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"8_CR71","unstructured":"Rothfuss, J., Fortuin, V., Josifoski, M., Krause, A.: PACOH: Bayes-optimal meta-learning with PAC-guarantees. In: International Conference on Machine Learning, pp. 9116\u20139126. PMLR (2021)"},{"key":"8_CR72","unstructured":"Volpp, M., et al.: Meta-learning acquisition functions for transfer learning in Bayesian optimization. arXiv preprint arXiv:1904.02642 (2019)"},{"key":"8_CR73","doi-asserted-by":"crossref","unstructured":"Feurer, M., Springenberg, J., Hutter, F.: Initializing Bayesian hyperparameter optimization via meta-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a029 (2015)","DOI":"10.1609\/aaai.v29i1.9354"},{"key":"8_CR74","doi-asserted-by":"crossref","unstructured":"Awad, N., Mallik, N., Hutter, F.: DEHB: evolutionary hyperband for scalable, robust and efficient hyperparameter optimization (2021)","DOI":"10.24963\/ijcai.2021\/296"},{"key":"8_CR75","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1007\/978-3-030-92307-5_32","volume-title":"Neural Information Processing","author":"J Cai","year":"2021","unstructured":"Cai, J., Ou, Y., Li, X., Wang, H.: ST-NAS: efficient optimization of\u00a0joint neural architecture and\u00a0hyperparameter. In: Mantoro, T., Lee, M., Ayu, M.A., Wong, K.W., Hidayanto, A.N. (eds.) ICONIP 2021. CCIS, vol. 1516, pp. 274\u2013281. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-92307-5_32"},{"key":"8_CR76","doi-asserted-by":"crossref","unstructured":"Hecht-Nielsen, R.: On the algebraic structure of feedforward network weight spaces. In: Advanced Neural Computers, pp. 129\u2013135. Elsevier (1990)","DOI":"10.1016\/B978-0-444-88400-8.50019-4"},{"key":"8_CR77","unstructured":"Peebles, W., Radosavovic, I., Brooks, T., Efros, A.A., Malik, J.: Learning to learn with generative models of neural network checkpoints. arXiv preprint arXiv:2209.12892 (2022)"},{"key":"8_CR78","unstructured":"Navon, A., Shamsian, A., Achituve, I., Fetaya, E., Chechik, G., Maron, H.: Equivariant architectures for learning in deep weight spaces. In: International Conference on Machine Learning, pp. 25790\u201325816. PMLR (2023)"},{"key":"8_CR79","series-title":"Adaptation Learning and Optimization","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1007\/978-3-642-10701-6_6","volume-title":"Computational Intelligence in Expensive Optimization Problems","author":"D Ginsbourger","year":"2010","unstructured":"Ginsbourger, D., Le Riche, R., Carraro, L.: Kriging is well-suited to parallelize optimization. In: Tenne, Y., Goh, C.-K. (eds.) Computational Intelligence in Expensive Optimization Problems. ALO, vol. 2, pp. 131\u2013162. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-10701-6_6"},{"key":"8_CR80","unstructured":"Jaderberg, M., et\u00a0al.: Population based training of neural networks. arXiv preprint arXiv:1711.09846 (2017)"},{"key":"8_CR81","unstructured":"Paszke, A., et\u00a0al.: PyTorch ImageNet training example. https:\/\/github.com\/pytorch\/examples\/blob\/main\/imagenet\/main.py (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91979-4_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:07:30Z","timestamp":1748718450000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91979-4_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031919787","9783031919794"],"references-count":81,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91979-4_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The approach presented in this paper facilitates machine learning research and applications by making it easier to find performant hyperparameters. Designing more efficient HPO can help lower the cost of model training (e.g., time, compute, and environmental impact), making machine learning experiments easier for those in other disciplines. Overall, the benefits and risks are likely similar to those of other automated machine learning (AutoML) research.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Statement"}},{"value":"NVIDIA funded this work. Jonathan Lorraine received funding from student scholarships at the University of Toronto and the Vector Institute, which do not directly support this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Funding"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}