{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T02:33:32Z","timestamp":1775097212602,"version":"3.50.1"},"reference-count":123,"publisher":"Informa UK Limited","issue":"1","license":[{"start":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T00:00:00Z","timestamp":1722816000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002809","name":"Generalitat de Catalunya","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003030","name":"AGAUR","doi-asserted-by":"publisher","award":["2019 DI 87"],"award-info":[{"award-number":["2019 DI 87"]}],"id":[{"id":"10.13039\/501100003030","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Applied Artificial Intelligence"],"published-print":{"date-parts":[[2024,12,31]]},"DOI":"10.1080\/08839514.2024.2383101","type":"journal-article","created":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T10:51:38Z","timestamp":1722855098000},"update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":26,"title":["Reinforcement Learning for Autonomous Process Control in Industry 4.0: Advantages and Challenges"],"prefix":"10.1080","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4949-1800","authenticated-orcid":false,"given":"Nuria","family":"Nievas","sequence":"first","affiliation":[{"name":"Eurecat, Centre Tecnol\u00f2gic de Catalunya, Unit of Applied Artificial Intelligence, Lleida, Spain"},{"name":"Economy and Business Department, Universitat de Lleida, Lleida, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0444-1220","authenticated-orcid":false,"given":"Adela","family":"Pag\u00e8s-Bernaus","sequence":"additional","affiliation":[{"name":"Economy and Business Department, Universitat de Lleida, Lleida, Spain"},{"name":"AGROTECNIO-CERCA Center, Lleida, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3946-6433","authenticated-orcid":false,"given":"Francesc","family":"Bonada","sequence":"additional","affiliation":[{"name":"Eurecat, Centre Tecnol\u00f2gic de Catalunya, Unit of Applied Artificial Intelligence, Lleida, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4037-5192","authenticated-orcid":false,"given":"Llu\u00eds","family":"Echeverria","sequence":"additional","affiliation":[{"name":"Eurecat, Centre Tecnol\u00f2gic de Catalunya, Unit of Applied Artificial Intelligence, Lleida, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8363-9082","authenticated-orcid":false,"given":"Xavier","family":"Domingo","sequence":"additional","affiliation":[{"name":"Eurecat, Centre Tecnol\u00f2gic de Catalunya, Unit of Applied Artificial Intelligence, Lleida, Spain"}]}],"member":"301","published-online":{"date-parts":[[2024,8,5]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13922"},{"key":"e_1_3_4_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2023.104399"},{"key":"e_1_3_4_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCECE.2019.8861718"},{"key":"e_1_3_4_5_1","article-title":"Fitted Q-iteration in continuous action-space MDPs","volume":"20","author":"Antos A.","year":"2007","unstructured":"Antos, A., C. Szepesv\u00e1ri, and R. Munos. 2007. Fitted Q-iteration in continuous action-space MDPs. Advances in Neural Information Processing Systems 20. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2007\/file\/da0d1111d2dc5d489242e60ebcbaf988-Paper.pdf.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_4_6_1","first-page":"1","volume-title":"2021 IEEE International Conference on Omni-Layer Intelligent Systems (COINS)","author":"Atae J.-T.","year":"2021","unstructured":"Atae, J.-T., and D. P. Gruber. 2021. Reinforcement-learning-based control of an industrial robotic arm for following a randomly-generated 2D-Trajectory. 2021 IEEE International Conference on Omni-Layer Intelligent Systems (COINS), Barcelona, Spain, 1\u20136. IEEE, August."},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.3182\/20140824-6-ZA-1003.02511"},{"key":"e_1_3_4_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09942-2"},{"key":"e_1_3_4_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10196923"},{"key":"e_1_3_4_10_1","doi-asserted-by":"publisher","DOI":"10.35833\/MPCE.2020.000552"},{"key":"e_1_3_4_11_1","first-page":"1125","volume-title":"International Conference on Machine Learning","author":"Dai B.","year":"2018","unstructured":"Dai, B., A. Shaw, L. Li, L. Xiao, N. He, Z. Liu, J. Chen, and L. Song. 2018. Sbeed: Convergent reinforcement learning with nonlinear function approximation. International Conference on Machine Learning, 1125\u201334. https:\/\/proceedings.mlr.press\/v80\/dai18c.html."},{"key":"e_1_3_4_12_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04301-9"},{"key":"e_1_3_4_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315022"},{"key":"e_1_3_4_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.03.019"},{"key":"e_1_3_4_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-021-05961-4"},{"key":"e_1_3_4_16_1","doi-asserted-by":"publisher","DOI":"10.3390\/en16165920"},{"key":"e_1_3_4_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2023.112381"},{"key":"e_1_3_4_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2017.12.213"},{"key":"e_1_3_4_19_1","first-page":"1587","volume-title":"International conference on machine learning","author":"Fujimoto S.","year":"2018","unstructured":"Fujimoto, S., H. Hoof, and D. Meger. 2018. Addressing function approximation error in actor-critic methods. International conference on machine learning, Stockholm, Sweden, 1587\u201396, PMLR."},{"issue":"1","key":"e_1_3_4_20_1","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u0131a J.","year":"2015","unstructured":"Garc\u0131a, J., and F. Fern\u00e1ndez. 2015. A comprehensive survey on safe reinforcement learning. Journal of Machine Learning Research 16 (1):1437\u201380. https:\/\/www.jmlr.org\/papers\/volume16\/garcia15a\/garcia15a.pdf.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_4_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"e_1_3_4_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.protcy.2014.09.007"},{"key":"e_1_3_4_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.mechatronics.2015.09.004"},{"key":"e_1_3_4_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2019.105828"},{"key":"e_1_3_4_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jii.2021.100287"},{"key":"e_1_3_4_26_1","first-page":"1861","volume-title":"International conference on machine learning, PMLR","author":"Haarnoja T.","year":"2018","unstructured":"Haarnoja, T., A. Zhou, P. Abbeel, and S. Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. International conference on machine learning, PMLR, Stockholm, Sweden, 1861\u201370. July."},{"key":"e_1_3_4_27_1","unstructured":"Hallak A. D. Di Castro and S. Mannor. 2015. Contextual markov decision processes. http:\/\/arxiv.org\/abs\/1502.02259."},{"key":"e_1_3_4_28_1","doi-asserted-by":"publisher","DOI":"10.1111\/mafi.12382"},{"key":"e_1_3_4_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3011351"},{"key":"e_1_3_4_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijepes.2022.108686"},{"key":"e_1_3_4_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.est.2023.108517"},{"key":"e_1_3_4_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106685"},{"key":"e_1_3_4_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00170-018-2864-2"},{"key":"e_1_3_4_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2023.108962"},{"key":"e_1_3_4_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"e_1_3_4_36_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.14174"},{"key":"e_1_3_4_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-020-01612-y"},{"key":"e_1_3_4_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2019.02.101"},{"key":"e_1_3_4_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2019.03.041"},{"key":"e_1_3_4_40_1","first-page":"11784","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume":"32","author":"Kumar A.","year":"2019","unstructured":"Kumar, A., J. Fu, G. Tucker, and S. Levine. 2019. Stabilizing off-policy Q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems (NeurIPS) 32:11784\u201311794.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_4_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"key":"e_1_3_4_42_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"e_1_3_4_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12599-014-0334-4"},{"key":"e_1_3_4_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.116222"},{"key":"e_1_3_4_45_1","first-page":"1","article-title":"Offline reinforcement learning: Tutorial","author":"Levine S.","year":"2020","unstructured":"Levine, S., A. Kumar, G. Tucker, and J. Fu. 2020. Offline reinforcement learning: Tutorial. Review, and Perspectives on Open Problems 1\u201343. http:\/\/arxiv.org\/abs\/2005.01643.","journal-title":"Review, and Perspectives on Open Problems"},{"key":"e_1_3_4_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jprocont.2022.05.006"},{"key":"e_1_3_4_47_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"e_1_3_4_48_1","doi-asserted-by":"publisher","DOI":"10.3390\/s20123515"},{"key":"e_1_3_4_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2020.3042876"},{"key":"e_1_3_4_50_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.promfg.2020.08.081"},{"key":"e_1_3_4_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-022-01639-y"},{"key":"e_1_3_4_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2022.102488"},{"key":"e_1_3_4_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-023-09112-9"},{"key":"e_1_3_4_54_1","first-page":"103803","article-title":"Adaptive laser welding control: A reinforcement learning approach","volume":"8","author":"Masinelli G.","year":"2020","unstructured":"Masinelli, G., T. Le-Quang, S. Zanoli, K. Wasmer, and S. A. Shevchik. 2020. Adaptive laser welding control: A reinforcement learning approach. Institute of Electrical and Electronics Engineers Access 8:103803\u201314. https:\/\/ieeexplore.ieee.org\/document\/9102251\/.","journal-title":"Institute of Electrical and Electronics Engineers Access"},{"key":"e_1_3_4_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/s40194-023-01641-0"},{"key":"e_1_3_4_56_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2021.01.011"},{"key":"e_1_3_4_57_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2017.03.095"},{"key":"e_1_3_4_58_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.promfg.2018.07.143"},{"key":"e_1_3_4_59_1","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"Mnih V.","year":"2016","unstructured":"Mnih, V., A. P. Badia, M. Mirza, A. Graves, T. Lillicrap, T. Harley, D. Silver, and K. Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. International Conference on Machine Learning, PMLR 1928\u201337. Jun. https:\/\/proceedings.mlr.press\/v48\/mniha16.html.","journal-title":"International Conference on Machine Learning, PMLR"},{"key":"e_1_3_4_60_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_4_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2834219"},{"key":"e_1_3_4_62_1","doi-asserted-by":"publisher","DOI":"10.1177\/1059712313511648"},{"key":"e_1_3_4_63_1","first-page":"597","volume-title":"Algorithmic learning theory","author":"Modi A.","year":"2018","unstructured":"Modi, A., N. Jiang, S. Singh, and A. Tewari. 2018. Markov decision processes with continuous side information. In Algorithmic learning theory, ed. F. Janoos, M. Mohri, and K. Sridharan, 597\u2013618. Cambridge, MA, USA: PMLR."},{"key":"e_1_3_4_64_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"e_1_3_4_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2021.08.046"},{"key":"e_1_3_4_66_1","unstructured":"Nagabandi A. I. Clavera S. Liu R. S. Fearing P. Abbeel S. Levine and C. Finn. 2018. Learning to adapt in dynamic real-world environments through meta-reinforcement learning. https:\/\/arxiv.org\/abs\/1803.11347."},{"key":"e_1_3_4_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610103"},{"key":"e_1_3_4_68_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2020.106886"},{"key":"e_1_3_4_69_1","doi-asserted-by":"publisher","DOI":"10.3390\/ma15144825"},{"key":"e_1_3_4_70_1","article-title":"Improving stability in deep reinforcement learning with weight averaging","author":"Nikishin E.","year":"2018","unstructured":"Nikishin, E., P. Izmailov, B. Athiwaratkun, D. Podoprikhin, T. Garipov, P. Shvechikov, D. Vetrov, and A. G. Wilson. 2018. Improving stability in deep reinforcement learning with weight averaging. Uncertainty in Artificial Intelligence Workshop on Uncertainty in Deep Learning. http:\/\/www.gatsby.ucl.ac.uk\/balaji\/udl-camera-ready\/UDL-24.pdf.","journal-title":"Uncertainty in Artificial Intelligence Workshop on Uncertainty in Deep Learning"},{"key":"e_1_3_4_71_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.10.022"},{"key":"e_1_3_4_72_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2018.11.006"},{"key":"e_1_3_4_73_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13596"},{"key":"e_1_3_4_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3042874"},{"key":"e_1_3_4_75_1","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"e_1_3_4_76_1","first-page":"1","volume-title":"IEEE Transactions on Neural Networks and Learning Systems","author":"Prudencio R. F.","year":"2023","unstructured":"Prudencio, R. F., M. R. O. A. Maximo, and E. Luna Colombini. 2023. A survey on offline reinforcement learning: Taxonomy, review, and open problems. IEEE Transactions on Neural Networks and Learning Systems, 1\u20130. https:\/\/ieeexplore.ieee.org\/document\/10078377\/."},{"key":"e_1_3_4_77_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-57321-8_5"},{"key":"e_1_3_4_78_1","doi-asserted-by":"publisher","DOI":"10.1108\/CW-03-2020-0044"},{"key":"e_1_3_4_79_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.chemolab.2018.03.010"},{"key":"e_1_3_4_80_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2022.08.074"},{"key":"e_1_3_4_81_1","first-page":"19914","article-title":"Bridging the reality gap between virtual and physical environments through reinforcement learning","volume":"11","author":"Ranaweera M.","year":"2023","unstructured":"Ranaweera, M., and Q. H. Mahmoud. 2023. Bridging the reality gap between virtual and physical environments through reinforcement learning. Institute of Electrical and Electronics Engineers Access 11:19914\u201327. https:\/\/ieeexplore.ieee.org\/document\/10054009\/.","journal-title":"Institute of Electrical and Electronics Engineers Access"},{"key":"e_1_3_4_82_1","doi-asserted-by":"publisher","DOI":"10.3390\/pr9071084"},{"key":"e_1_3_4_83_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2019.03.027"},{"key":"e_1_3_4_84_1","doi-asserted-by":"publisher","DOI":"10.1155\/2019\/1591204"},{"key":"e_1_3_4_85_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11740-023-01209-3"},{"key":"e_1_3_4_86_1","volume-title":"International Conference on Learning Representations","author":"Schaul T.","year":"2016","unstructured":"Schaul, T., J. Quan, I. Antonoglou, and D. Silver. 2016. Prioritized experience replay. International Conference on Learning Representations, San Juan, Puerto Rico."},{"key":"e_1_3_4_87_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.promfg.2020.10.126"},{"key":"e_1_3_4_88_1","first-page":"1889","volume-title":"International conference on machine learning. PMLR","author":"Schulman J.","year":"2015","unstructured":"Schulman, J., S. Levine, P. Abbeel, M. Jordan, and P. Moritz. 2015. Trust region policy optimization. International conference on machine learning. PMLR, 1889\u201397. https:\/\/proceedings.mlr.press\/v37\/schulman15.html."},{"key":"e_1_3_4_89_1","unstructured":"Schulman J. F. Wolski P. Dhariwal A. Radford and O. Klimov. 2017. Proximal policy optimization algorithms. https:\/\/arxiv.org\/abs\/1707.06347."},{"key":"e_1_3_4_90_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2019.2962465"},{"key":"e_1_3_4_91_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"e_1_3_4_92_1","first-page":"387","volume-title":"International conference on machine learning, PMLR","author":"Silver D.","year":"2014","unstructured":"Silver, D., G. Lever, N. Heess, T. Degris, D. Wierstra, and M. Riedmiller. 2014. Deterministic policy gradient algorithms. International conference on machine learning, PMLR, 387\u201395, January. https:\/\/proceedings.mlr.press\/v32\/silver14.html."},{"key":"e_1_3_4_93_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"e_1_3_4_94_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_12"},{"key":"e_1_3_4_95_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2018.04.041"},{"key":"e_1_3_4_96_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton R. S.","year":"2018","unstructured":"Sutton, R. S., and A. G. Barto. 2018. Reinforcement learning: An introduction. Cambridge, MA, USA: MIT press."},{"key":"e_1_3_4_97_1","first-page":"12","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"Sutton R. S.","year":"1999","unstructured":"Sutton, R. S., D. McAllester, S. Singh, and Y. Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. Advances in Neural Information Processing Systems 12. https:\/\/proceedings.neurips.cc\/paper\/1999\/file\/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_4_98_1","doi-asserted-by":"publisher","DOI":"10.1007\/s42524-020-0126-0"},{"key":"e_1_3_4_99_1","doi-asserted-by":"publisher","DOI":"10.1109\/WSC48552.2020.9384048"},{"key":"e_1_3_4_100_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIE.2012.2207818"},{"key":"e_1_3_4_101_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107300"},{"key":"e_1_3_4_102_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10205-5"},{"key":"e_1_3_4_103_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_4_104_1","first-page":"29252","volume-title":"Advances in neural information processing systems","author":"Wachi A.","year":"2023","unstructured":"Wachi, A., H. Wataru, S. Xun, and H. Kazumune. 2023. Safe exploration in reinforcement learning: A generalized formulation and algorithms. In Advances in neural information processing systems, ed. A. Oh, T. Neumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine, vol. 36, 29252\u201372. NY, USA: Curran Associates, Inc."},{"key":"e_1_3_4_105_1","doi-asserted-by":"publisher","DOI":"10.3390\/app9030520"},{"key":"e_1_3_4_106_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jii.2023.100471"},{"key":"e_1_3_4_107_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-03326-5"},{"key":"e_1_3_4_108_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2018.03.212"},{"key":"e_1_3_4_109_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_4_110_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isatra.2021.06.007"},{"key":"e_1_3_4_111_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.actamat.2024.120017"},{"key":"e_1_3_4_112_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.08.037"},{"key":"e_1_3_4_113_1","first-page":"5997","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Yang J.","year":"2023","unstructured":"Yang, J., T. Dzanic, B. Petersen, J. Kudo, K. Mittal, V. Tomov, J.-S. Camier. 2023. Reinforcement learning for adaptive mesh refinement. International Conference on Artificial Intelligence and Statistics, Valencia, Spain, 5997\u20136014, PMLR."},{"key":"e_1_3_4_114_1","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM.2013.6724540"},{"key":"e_1_3_4_115_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)EY.1943-7897.0000519"},{"key":"e_1_3_4_116_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"e_1_3_4_117_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2014.2357079"},{"key":"e_1_3_4_118_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/820"},{"key":"e_1_3_4_119_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC49329.2020.9164440"},{"key":"e_1_3_4_120_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20393"},{"issue":"1","key":"e_1_3_4_121_1","first-page":"213","article-title":"Deep reinforcement learning for power system: An overview","volume":"6","author":"Zhang Z.","year":"2019","unstructured":"Zhang, Z., D. Zhang, and R. C. Qiu. 2019. Deep reinforcement learning for power system: An overview. CSEE Journal of Power & Energy Systems 6 (1):213\u201325.","journal-title":"CSEE Journal of Power & Energy Systems"},{"key":"e_1_3_4_122_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ress.2022.108541"},{"key":"e_1_3_4_123_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.anucene.2023.109685"},{"key":"e_1_3_4_124_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3292075"}],"container-title":["Applied Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/08839514.2024.2383101","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T16:23:12Z","timestamp":1734366192000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/08839514.2024.2383101"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,5]]},"references-count":123,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,12,31]]}},"alternative-id":["10.1080\/08839514.2024.2383101"],"URL":"https:\/\/doi.org\/10.1080\/08839514.2024.2383101","relation":{},"ISSN":["0883-9514","1087-6545"],"issn-type":[{"value":"0883-9514","type":"print"},{"value":"1087-6545","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,5]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=uaai20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=uaai20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-04-16","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-05-30","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-07-12","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-08-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}],"article-number":"2383101"}}