{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T16:40:12Z","timestamp":1759164012265,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","funder":[{"name":"European Research Council (ERC)","award":["101125586"],"award-info":[{"award-number":["101125586"]}]},{"name":"Agence Nationale de la Recherche (ANR)","award":["ANR-23-CE23-0035"],"award-info":[{"award-number":["ANR-23-CE23-0035"]}]},{"name":"St Andrews Global Doctoral Scholarship programme"},{"name":"European Cooperation in Science and Technology (COST)","award":["CA22137"],"award-info":[{"award-number":["CA22137"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,27]]},"DOI":"10.1145\/3729878.3746703","type":"proceedings-article","created":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T13:47:17Z","timestamp":1755611237000},"page":"190-201","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-parameter Control for the (1+(\u03bb, \u03bb))-GA on OneMax via Deep Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7707-2069","authenticated-orcid":false,"given":"Tai","family":"Nguyen","sequence":"first","affiliation":[{"name":"University of St Andrews, St Andrews, United Kingdom, Sorbonne Universit\u00e9, CNRS, LIP6 Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0749-9519","authenticated-orcid":false,"given":"Phong","family":"Le","sequence":"additional","affiliation":[{"name":"University of St Andrews, St Andrews, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4981-3227","authenticated-orcid":false,"given":"Carola","family":"Doerr","sequence":"additional","affiliation":[{"name":"Sorbonne Universit\u00e9, CNRS, LIP6, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2693-6953","authenticated-orcid":false,"given":"Nguyen","family":"Dang","sequence":"additional","affiliation":[{"name":"University of St Andrews, St Andrews, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13922"},{"key":"e_1_3_2_1_2_1","volume-title":"A Systematic Literature Review of Adaptive Parameter Control Methods for Evolutionary Algorithms. Comput. Surveys 49","author":"Aleti Aldeida","year":"2016","unstructured":"Aldeida Aleti and Irene Moser. 2016. A Systematic Literature Review of Adaptive Parameter Control Methods for Evolutionary Algorithms. Comput. Surveys 49 (2016), 56:1--56:35."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00453-022-00957-5"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00453-023-01098-Z"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299904.3340317"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00453-021-00907-7"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1570256.1570342"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.3103\/S0146411621070208"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA200122"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512290.3528846"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3071178.3071297"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3594805.3607127"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3321707.3321725"},{"key":"e_1_3_2_1_15_1","unstructured":"Luc Devroye. 1972. The compound random search. Ph.D. dissertation Purdue Univ. West Lafayette IN."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2739480.2754684"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00453-017-0354-9"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Benjamin Doerr and Carola Doerr. 2020. Theory of parameter control for discrete black-box optimization: Provable performance gains through dynamic parameter choices. Theory of Evolutionary Computation: Recent Developments in Discrete Optimization (2020) 271--321.","DOI":"10.1007\/978-3-030-29414-4_6"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2014.11.028"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00453-021-00854-3"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00224-012-9438-8"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205455.3205560"},{"key":"e_1_3_2_1_23_1","volume-title":"d.]. Deep reinforcement learning in large discrete action spaces. arXiv","author":"Dulac-Arnold G","year":"2015","unstructured":"G Dulac-Arnold, R Evans, H van Hasselt, P Sunehag, T Lillicrap, J Hunt, T Mann, T Weber, T Degris, and B Coppin. [n. d.]. Deep reinforcement learning in large discrete action spaces. arXiv 2015. arXiv preprint arXiv:1512.07679 ([n.d.])."},{"key":"e_1_3_2_1_24_1","volume-title":"Challenges of real-world reinforcement learning. arXiv preprint arXiv:1904.12901","author":"Dulac-Arnold Gabriel","year":"2019","unstructured":"Gabriel Dulac-Arnold, Daniel Mankowitz, and Todd Hester. 2019. Challenges of real-world reinforcement learning. arXiv preprint arXiv:1904.12901 (2019)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/4235.771166"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1162\/EVCO_a_00148"},{"key":"e_1_3_2_1_27_1","volume-title":"ICML (Proceedings of Machine Learning Research","volume":"1865","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor.. In ICML (Proceedings of Machine Learning Research, Vol. 80), Jennifer G. Dy and Andreas Krause (Eds.). PMLR, 1856--1865. http:\/\/dblp.uni-trier.de\/db\/conf\/icml\/icml2018.html#HaarnojaZAL18"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1189"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"e_1_3_2_1_30_1","volume-title":"International conference on machine learning. PMLR, 9072--9098","author":"Hu Hao","year":"2022","unstructured":"Hao Hu, Yiqin Yang, Qianchuan Zhao, and Chongjie Zhang. 2022. On the role of discount factor in offline reinforcement learning. In International conference on machine learning. PMLR, 9072--9098."},{"key":"e_1_3_2_1_31_1","volume-title":"Reproducibility of benchmarked deep reinforcement learning tasks for continuous control. arXiv preprint arXiv:1708.04133","author":"Islam Riashat","year":"2017","unstructured":"Riashat Islam, Peter Henderson, Maziar Gomrokchi, and Doina Precup. 2017. Reproducibility of benchmarked deep reinforcement learning tasks for continuous control. arXiv preprint arXiv:1708.04133 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-29178-4_37"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:NACO.0000023416.59689.4e"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:NACO.0000023416.59689.4e"},{"volume-title":"Proceedings of the 3rd International Conference on Learning Representations, (ICLR'15)","author":"Diederik","key":"e_1_3_2_1_35_1","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In Proceedings of the 3rd International Conference on Learning Representations, (ICLR'15), Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00453-012-9616-8"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/S42979-022-01203-Z"},{"key":"e_1_3_2_1_38_1","volume-title":"4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1509","author":"Lillicrap Timothy P.","year":"2016","unstructured":"Timothy P. Lillicrap, Jonathan J. Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2016. Continuous control with deep reinforcement learning. In 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1509.02971"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.orp.2016.09.002"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3638529.3653996"},{"key":"e_1_3_2_1_41_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3712256.3726395"},{"key":"e_1_3_2_1_43_1","unstructured":"Tai Nguyen Phong Le Carola Doerr and Nguyen Dang. 2025. https:\/\/girhub.com\/taindp98\/OneMax-MPDAC.git."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3034141"},{"volume-title":"Friedrich Fromman Verlag (G\u00fcnther Holzboog KG)","author":"Rechenberg Ingo","key":"e_1_3_2_1_45_1","unstructured":"Ingo Rechenberg. 1973. Evolutionsstrategie. Friedrich Fromman Verlag (G\u00fcnther Holzboog KG), Stuttgart."},{"key":"e_1_3_2_1_46_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv: 1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv: 1707.06347 (2017)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1968.1098903"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3321707.3321813"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v31i1.16008"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520304.3533983"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70068-2_2"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Christopher JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning 8(1992) 279--292.","DOI":"10.1023\/A:1022676722315"},{"key":"e_1_3_2_1_56_1","first-page":"20147","article-title":"Multi-agent dynamic algorithm configuration","volume":"35","author":"Xue Ke","year":"2022","unstructured":"Ke Xue, Jiacheng Xu, Lei Yuan, Miqing Li, Chao Qian, Zongzhang Zhang, and Yang Yu. 2022. Multi-agent dynamic algorithm configuration. Advances in Neural Information Processing Systems 35 (2022), 20147--20161.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119639"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2022.3197298"},{"key":"e_1_3_2_1_59_1","volume-title":"Learn what not to learn: Action elimination with deep reinforcement learning. Advances in neural information processing systems 31","author":"Zahavy Tom","year":"2018","unstructured":"Tom Zahavy, Matan Haroush, Nadav Merlis, Daniel J Mankowitz, and Shie Mannor. 2018. Learn what not to learn: Action elimination with deep reinforcement learning. Advances in neural information processing systems 31 (2018)."}],"event":{"name":"FOGA '25: Foundations of Genetic Algorithms XVIII","sponsor":["SIGEVO ACM Special Interest Group on Genetic and Evolutionary Computation"],"location":"Leiden Netherlands","acronym":"FOGA '25"},"container-title":["Proceedings of the 18th ACM\/SIGEVO Conference on Foundations of Genetic Algorithms"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3729878.3746703","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T16:22:13Z","timestamp":1759162933000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3729878.3746703"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,27]]},"references-count":59,"alternative-id":["10.1145\/3729878.3746703","10.1145\/3729878"],"URL":"https:\/\/doi.org\/10.1145\/3729878.3746703","relation":{},"subject":[],"published":{"date-parts":[[2025,8,27]]},"assertion":[{"value":"2025-08-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}