{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T22:41:02Z","timestamp":1743028862472,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":51,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819795352"},{"type":"electronic","value":"9789819795369"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9536-9_3","type":"book-chapter","created":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T11:19:58Z","timestamp":1739359198000},"page":"39-56","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Curriculum Reinforcement Learning for\u00a0Tokamak Control"],"prefix":"10.1007","author":[{"given":"Samy","family":"Kerboua-Benlarbi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00e9my","family":"Nouailletas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Blaise","family":"Faugeras","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philippe","family":"Moreau","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,13]]},"reference":[{"key":"3_CR1","unstructured":"Abdolmaleki, A., et\u00a0al.: Maximum a posteriori policy optimisation. arXiv preprint arXiv:1806.06920 (2018)"},{"key":"3_CR2","unstructured":"Abdolmaleki, A., et\u00a0al.: Relative entropy regularized policy iteration. arXiv preprint arXiv:1812.02256 (2018)"},{"issue":"4","key":"3_CR3","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1109\/TCST.2005.847331","volume":"13","author":"KH Ang","year":"2005","unstructured":"Ang, K.H., Chong, G., Li, Y.: PID control system analysis, design, and technology. IEEE Trans. Control Syst. Technol. 13(4), 559\u2013576 (2005)","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"3_CR4","doi-asserted-by":"publisher","unstructured":"Ariola, M., Pironti, A.: Magnetic Control of Tokamak Plasmas. Springer, London (2008). https:\/\/doi.org\/10.1007\/978-1-84800-324-8","DOI":"10.1007\/978-1-84800-324-8"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 41\u201348. Association for Computing Machinery (2009)","DOI":"10.1145\/1553374.1553380"},{"issue":"6","key":"3_CR6","doi-asserted-by":"publisher","first-page":"063","DOI":"10.1088\/0029-5515\/55\/6\/063017","volume":"55","author":"C Bourdelle","year":"2015","unstructured":"Bourdelle, C., Artaud, J.F., et al.: West physics basis. Nucl. Fusion 55(6), 063\u2013017 (2015)","journal-title":"Nucl. Fusion"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Brohan, A., Brown, N., et\u00a0al.: RT-1: robotics transformer for real-world control at scale (2023)","DOI":"10.15607\/RSS.2023.XIX.025"},{"key":"3_CR8","unstructured":"Bucalossi, J., et\u00a0al.: Operating a full tungsten actively cooled tokamak: overview of west first phase of operation. Nucl. Fusion 62(4), 042007 (2022)"},{"key":"3_CR9","unstructured":"Carpanese, F.: Development of free-boundary equilibrium and transport solvers for simulation and real-time interpretation of tokamak experiments, p.\u00a0238 (2021)"},{"key":"3_CR10","unstructured":"Char, I., Abbate, J., et\u00a0al.: Offline model-based reinforcement learning for tokamak control. In: Proceedings of Machine Learning Research, vol.\u00a0211, pp. 1357\u20131372. PMLR (2023)"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"De Tommasi, G., Dubbioso, S., et\u00a0al.: A RL-based vertical stabilization system for the east tokamak. In: 2022 American Control Conference (ACC), pp. 5328\u20135333 (2022)","DOI":"10.23919\/ACC53348.2022.9867499"},{"issue":"7897","key":"3_CR12","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1038\/s41586-021-04301-9","volume":"602","author":"J Degrave","year":"2022","unstructured":"Degrave, J., Felici, F., et al.: Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602(7897), 414\u2013419 (2022)","journal-title":"Nature"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Dubbioso, S., De Tommasi, G., et\u00a0al.: A deep reinforcement learning approach for vertical stabilization of tokamak plasmas. Fusion Eng. Des. 194, 113725 (2023)","DOI":"10.1016\/j.fusengdes.2023.113725"},{"key":"3_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.fusengdes.2020.112020","volume":"160","author":"B Faugeras","year":"2020","unstructured":"Faugeras, B.: An overview of the numerical methods for tokamak plasma equilibrium computation implemented in the nice code. Fusion Eng. Des. 160, 112020 (2020)","journal-title":"Fusion Eng. Des."},{"key":"3_CR15","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)"},{"key":"3_CR16","unstructured":"Goodfellow, I.J., Mirza, M., Xiao, D., Courville, A., Bengio, Y.: An empirical investigation of catastrophic forgetting in gradient-based neural networks (2015)"},{"key":"3_CR17","unstructured":"Graves, A., Bellemare, M.G., Menick, J., Munos, R., Kavukcuoglu, K.: Automated curriculum learning for neural networks (2017)"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Grondman, I., Busoniu, L., Lopes, G.A.D., Babuska, R.: A survey of actor-critic reinforcement learning: standard and natural policy gradients. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 42(6), 1291\u20131307 (2012)","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"3_CR19","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Han, D., Mulyana, B., Stankovic, V., Cheng, S.: A survey on deep reinforcement learning algorithms for robotic manipulation. Sensors 23(7) (2023)","DOI":"10.3390\/s23073762"},{"key":"3_CR21","doi-asserted-by":"publisher","unstructured":"Harutyunyan, A., Devlin, S., Vrancx, P., Nowe, A.: Expressing arbitrary reward functions as potential-based advice. Proc. AAAI Conf. Artif. Intell. 29(1) (2015). https:\/\/doi.org\/10.1609\/aaai.v29i1.9628","DOI":"10.1609\/aaai.v29i1.9628"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Heumann, H.: A Galerkin method for the weak formulation of current diffusion and force balance in tokamak plasmas. J. Comput. Phys. 442 (2021)","DOI":"10.1016\/j.jcp.2021.110483"},{"key":"3_CR23","unstructured":"Hoffman, M.W., et\u00a0al.: Acme: a research framework for distributed reinforcement learning. arXiv preprint arXiv:2006.00979 (2020)"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Ivanovic, B., Harrison, J., et\u00a0al.: BaRC: backward reachability curriculum for robotic reinforcement learning. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 15\u201321. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8794206"},{"key":"3_CR25","unstructured":"Kapturowski, S., Ostrovski, G., et\u00a0al.: Recurrent experience replay in distributed reinforcement learning. In: International Conference on Learning Representations (2018)"},{"key":"3_CR26","doi-asserted-by":"publisher","unstructured":"Kerboua-Benlarbi, S., Nouailletas, R., Faugeras, B., Nardon, E., Moreau, P.: Magnetic control of west plasmas through deep reinforcement learning. IEEE Trans. Plasma Sci., 1\u20130 (2024). https:\/\/doi.org\/10.1109\/TPS.2024.3377811","DOI":"10.1109\/TPS.2024.3377811"},{"issue":"6","key":"3_CR27","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2022","unstructured":"Kiran, B.R., Sobh, I., et al.: Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"3_CR28","unstructured":"Levine, S.: Reinforcement learning and control as probabilistic inference: tutorial and review (2018)"},{"key":"3_CR29","unstructured":"Lillicrap, T.P., Hunt, J.J., et\u00a0al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"3_CR30","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.artint.2017.09.001","volume":"254","author":"P MacAlpine","year":"2018","unstructured":"MacAlpine, P., Stone, P.: Overlapping layered learning. Artif. Intell. 254, 21\u201343 (2018)","journal-title":"Artif. Intell."},{"issue":"1","key":"3_CR31","doi-asserted-by":"publisher","DOI":"10.1088\/0029-5515\/50\/1\/014004","volume":"50","author":"D Meade","year":"2009","unstructured":"Meade, D.: 50 years of fusion research. Nucl. Fusion 50(1), 014004 (2009). https:\/\/doi.org\/10.1088\/0029-5515\/50\/1\/014004","journal-title":"Nucl. Fusion"},{"key":"3_CR32","unstructured":"Mnih, V., Badia, A.P., et\u00a0al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)"},{"issue":"1","key":"3_CR33","first-page":"7382","volume":"21","author":"S Narvekar","year":"2020","unstructured":"Narvekar, S., Peng, B., et al.: Curriculum learning for reinforcement learning domains: a framework and survey. J. Mach. Learn. Res. 21(1), 7382\u20137431 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"3_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.fusengdes.2023.113582","volume":"192","author":"R Nouailletas","year":"2023","unstructured":"Nouailletas, R., Moreau, P., et al.: West plasma control system status. Fusion Eng. Des. 192, 113582 (2023)","journal-title":"Fusion Eng. Des."},{"key":"3_CR35","unstructured":"Rolnick, D., Ahuja, A., Schwarz, J., Lillicrap, T.P., Wayne, G.: Experience replay for continual learning. CoRR abs\/1811.11682 (2018)"},{"key":"3_CR36","unstructured":"Rusu, A.A., Colmenarejo, S.G., et\u00a0al.: Policy distillation (2016)"},{"key":"3_CR37","unstructured":"Rusu, A.A., Rabinowitz, N.C., et\u00a0al.: Progressive neural networks. CoRR abs\/1606.04671 (2016). http:\/\/arxiv.org\/abs\/1606.04671"},{"key":"3_CR38","unstructured":"Schulman, J., Levine, S., et\u00a0al.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897. PMLR (2015)"},{"key":"3_CR39","unstructured":"Schulman, J., Wolski, F., et\u00a0al.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"3_CR40","doi-asserted-by":"publisher","unstructured":"Seo, J., Kim, S., Jalalvand, A., et\u00a0al.: Avoiding fusion plasma tearing instability with deep reinforcement learning. Nature 626, 746\u2013751 (2024). https:\/\/doi.org\/10.1038\/s41586-024-07024-9","DOI":"10.1038\/s41586-024-07024-9"},{"issue":"10","key":"3_CR41","doi-asserted-by":"publisher","DOI":"10.1088\/1741-4326\/ac121b","volume":"61","author":"J Seo","year":"2021","unstructured":"Seo, J., Na, Y.S., et al.: Feedforward beta control in the KSTAR tokamak by deep reinforcement learning. Nucl. Fusion 61(10), 106010 (2021)","journal-title":"Nucl. Fusion"},{"issue":"6","key":"3_CR42","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1007\/s11263-022-01611-x","volume":"130","author":"P Soviany","year":"2022","unstructured":"Soviany, P., Ionescu, R.T., Rota, P., Sebe, N.: Curriculum learning: a survey. Int. J. Comput. Vision 130(6), 1526\u20131565 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"3_CR43","unstructured":"Stanley, K.O., Bryant, B.D., Miikkulainen, R.: Evolving neural network agents in the Nero video game (2005)"},{"key":"3_CR44","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (2018)"},{"key":"3_CR45","unstructured":"Tracey, B.D., Michi, A., The TCV Team, et\u00a0al.: Towards practical reinforcement learning for tokamak magnetic control. ArXiv abs\/2307.11546 (2023)"},{"issue":"6","key":"3_CR46","doi-asserted-by":"publisher","DOI":"10.1088\/1741-4326\/ab1571","volume":"59","author":"T Wakatsuki","year":"2019","unstructured":"Wakatsuki, T., Suzuki, T., et al.: Safety factor profile control with reduced central solenoid flux consumption during plasma current ramp-up phase using a reinforcement learning technique. Nucl. Fusion 59(6), 066022 (2019)","journal-title":"Nucl. Fusion"},{"issue":"4","key":"3_CR47","doi-asserted-by":"publisher","DOI":"10.1088\/1741-4326\/abe68d","volume":"61","author":"T Wakatsuki","year":"2021","unstructured":"Wakatsuki, T., Suzuki, T., Oyama, N., Hayashi, N.: Ion temperature gradient control using reinforcement learning technique. Nucl. Fusion 61(4), 046036 (2021)","journal-title":"Nucl. Fusion"},{"key":"3_CR48","doi-asserted-by":"publisher","unstructured":"Wesson, J.: Tokamaks 3rd edition. J. Plasma Phys. 71(3), 377 (2004). https:\/\/doi.org\/10.1017\/S0022377804003058","DOI":"10.1017\/S0022377804003058"},{"key":"3_CR49","unstructured":"Wo\u0142czyk, M., Zaj\u0105c, M., Pascanu, R., Kuci\u0144ski, \u0141., Mi\u0142o\u015b, P.: Disentangling transfer in continual reinforcement learning (2022)"},{"key":"3_CR50","unstructured":"Wu, Y., Tian, Y.: Training agent for first-person shooter game with actor-critic curriculum learning. In: International Conference on Learning Representations (2017)"},{"key":"3_CR51","doi-asserted-by":"publisher","unstructured":"Zhu, Z., Lin, K., Jain, A.K., Zhou, J.: Transfer learning in deep reinforcement learning: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 45(11) (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3292075","DOI":"10.1109\/TPAMI.2023.3292075"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence for Research and Democracy"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9536-9_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T11:20:23Z","timestamp":1739359223000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9536-9_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819795352","9789819795369"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9536-9_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"13 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that\u00a0are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"IJCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jeju Island","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ijcai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ijcai24.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}