{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:17:06Z","timestamp":1775578626915,"version":"3.50.1"},"reference-count":71,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,5,2]],"date-time":"2024-05-02T00:00:00Z","timestamp":1714608000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,2]],"date-time":"2024-05-02T00:00:00Z","timestamp":1714608000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000183","name":"United States Department of Defense | United States Army | U.S. Army Research, Development and Engineering Command | Army Research Office","doi-asserted-by":"publisher","award":["W911NF-19-1-0233"],"award-info":[{"award-number":["W911NF-19-1-0233"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"United States Department of Defense | United States Army | U.S. Army Research, Development and Engineering Command | Army Research Office","doi-asserted-by":"publisher","award":["W911NF-19-1-0233"],"award-info":[{"award-number":["W911NF-19-1-0233"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"United States Department of Defense | United States Navy | Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-21-1-2706"],"award-info":[{"award-number":["N00014-21-1-2706"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"United States Department of Defense | United States Navy | Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-21-1-2706"],"award-info":[{"award-number":["N00014-21-1-2706"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-024-00829-3","type":"journal-article","created":{"date-parts":[[2024,5,2]],"date-time":"2024-05-02T10:03:33Z","timestamp":1714644213000},"page":"504-514","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Maximum diffusion reinforcement learning"],"prefix":"10.1038","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3781-0934","authenticated-orcid":false,"given":"Thomas A.","family":"Berrueta","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3095-8856","authenticated-orcid":false,"given":"Allison","family":"Pinosky","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2262-8176","authenticated-orcid":false,"given":"Todd D.","family":"Murphey","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,5,2]]},"reference":[{"key":"829_CR1","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1038\/s41586-021-04301-9","volume":"602","author":"J Degrave","year":"2022","unstructured":"Degrave, J. et al. Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602, 414\u2013419 (2022).","journal-title":"Nature"},{"key":"829_CR2","doi-asserted-by":"publisher","first-page":"eabb9764","DOI":"10.1126\/scirobotics.abb9764","volume":"5","author":"D-O Won","year":"2020","unstructured":"Won, D.-O., M\u00fcller, K.-R. & Lee, S.-W. An adaptive deep reinforcement learning framework enables curling robots with human-like performance in real-world conditions. Sci. Robot. 5, eabb9764 (2020).","journal-title":"Sci. Robot."},{"key":"829_CR3","unstructured":"Irpan, A. Deep reinforcement learning doesn\u2019t work yet. Sorta Insightful www.alexirpan.com\/2018\/02\/14\/rl-hard.html (2018)."},{"key":"829_CR4","doi-asserted-by":"crossref","unstructured":"Henderson, P. et al. Deep reinforcement learning that matters. In Proc. 32nd AAAI Conference on Artificial Intelligence (eds McIlraith, S. & Weinberger, K.) 3207\u20133214 (AAAI, 2018).","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"829_CR5","doi-asserted-by":"publisher","first-page":"698","DOI":"10.1177\/0278364920987859","volume":"40","author":"J Ibarz","year":"2021","unstructured":"Ibarz, J. et al. How to train your robot with deep reinforcement learning: lessons we have learned. Int. J. Rob. Res. 40, 698\u2013721 (2021).","journal-title":"Int. J. Rob. Res."},{"key":"829_CR6","unstructured":"Lillicrap, T. P. et al. Proc. 4th International Conference on Learning Representations (ICLR, 2016)."},{"key":"829_CR7","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P. & Levine, S. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In Proc. 35th International Conference on Machine Learning (eds Dy, J. & Krause, A.) 1861\u20131870 (PMLR, 2018)."},{"key":"829_CR8","unstructured":"Plappert, M. et al. Proc. 6th International Conference on Learning Representations (ICLR, 2018)."},{"key":"829_CR9","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/BF00992699","volume":"8","author":"L-J Lin","year":"1992","unstructured":"Lin, L.-J. Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8, 293\u2013321 (1992).","journal-title":"Mach. Learn."},{"key":"829_CR10","unstructured":"Schaul, T., Quan, J., Antonoglou, I. & Silver, D. Proc. 4th International Conference on Learning Representations (ICLR, 2016)."},{"key":"829_CR11","unstructured":"Andrychowicz, M. et al. Hindsight experience replay. In Proc. Advances in Neural Information Processing Systems 30 (eds Guyon, I. et al.) 5049\u20135059 (Curran Associates, 2017)."},{"key":"829_CR12","unstructured":"Zhang, S. & Sutton, R. S. A deeper look at experience replay. Preprint at https:\/\/arxiv.org\/abs\/1712.01275 (2017)."},{"key":"829_CR13","unstructured":"Wang, Z. et al. Proc. 5th International Conference on Learning Representations (ICLR, 2017)."},{"key":"829_CR14","doi-asserted-by":"crossref","unstructured":"Hessel, M. et al. Rainbow: combining improvements in deep reinforcement learning. In Proc. 32nd AAAI Conference on Artificial Intelligence (eds McIlraith, S. and Weinberger, K.) 3215\u20133222 (AAAI Press, 2018).","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"829_CR15","unstructured":"Fedus, W. et al. Revisiting fundamentals of experience replay. In Proc. 37th International Conference on Machine Learning (eds Daum\u00e9 III, H. & Singh, A.) 3061\u20133071 (JMLR.org, 2020)."},{"key":"829_CR16","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V. et al. Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015).","journal-title":"Nature"},{"key":"829_CR17","unstructured":"Ziebart, B. D., Maas, A. L., Bagnell, J. A. & Dey, A. K. Maximum entropy inverse reinforcement learning. In Proc. 23rd AAAI Conference on Artificial Intelligence (ed. Cohn, A.) 1433\u20131438 (AAAI, 2008)."},{"key":"829_CR18","unstructured":"Ziebart, B. D., Bagnell, J. A. & Dey, A. K. Modeling interaction via the principle of maximum causal entropy. In Proc. 27th International Conference on Machine Learning (eds F\u00fcrnkranz, J. & Joachims, T.) 1255\u20131262 (Omnipress, 2010)."},{"key":"829_CR19","unstructured":"Ziebart, B. D. Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy. PhD thesis, Carnegie Mellon Univ. (2010)."},{"key":"829_CR20","doi-asserted-by":"publisher","first-page":"11478","DOI":"10.1073\/pnas.0710743106","volume":"106","author":"E Todorov","year":"2009","unstructured":"Todorov, E. Efficient computation of optimal actions. Proc. Natl Acad. Sci. USA 106, 11478\u201311483 (2009).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"829_CR21","doi-asserted-by":"crossref","unstructured":"Toussaint, M. Robot trajectory optimization using approximate inference. In Proc. 26th International Conference on Machine Learning (eds Bottou, L. & Littman, M.) 1049\u20131056 (ACM, 2009).","DOI":"10.1145\/1553374.1553508"},{"key":"829_CR22","doi-asserted-by":"crossref","unstructured":"Rawlik, K., Toussaint, M. & Vijayakumar, S. On stochastic optimal control and reinforcement learning by approximate inference. In Proc. Robotics: Science and Systems VIII (eds Roy, N. et al.) 353\u2013361 (MIT, 2012).","DOI":"10.15607\/RSS.2012.VIII.045"},{"key":"829_CR23","unstructured":"Levine, S. & Koltun, V. Guided policy search. In Proc. 30th International Conference on Machine Learning (eds Dasgupta, S. & McAllester, D.) 1\u20139 (JMLR.org, 2013)."},{"key":"829_CR24","unstructured":"Haarnoja, T., Tang, H., Abbeel, P. & Levine, S. Reinforcement learning with deep energy-based policies. In Proc. 34th International Conference on Machine Learning (eds Precup, D. & Teh, Y. W.) 1352\u20131361 (JMLR.org, 2017)."},{"key":"829_CR25","doi-asserted-by":"crossref","unstructured":"Haarnoja, T. et al. Learning to walk via deep reinforcement learning. In Proc. Robotics: Science and Systems XV (eds Bicchi, A. et al.) (RSS, 2019).","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"829_CR26","unstructured":"Eysenbach, B. & Levine, S. Proc. 10th International Conference on Learning Representations (ICLR, 2022)."},{"key":"829_CR27","doi-asserted-by":"crossref","unstructured":"Chen, M. et al. Top-K off-policy correction for a REINFORCE recommender system. In Proc. 12th ACM International Conference on Web Search and Data Mining (eds Bennett, P. N. & Lerman, K.) 456\u2013464 (ACM, 2019).","DOI":"10.1145\/3289600.3290999"},{"key":"829_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3543846","volume":"55","author":"MM Afsar","year":"2022","unstructured":"Afsar, M. M., Crump, T. & Far, B. Reinforcement learning based recommender systems: a survey. ACM Comput. Surv. 55, 1\u201338 (2022).","journal-title":"ACM Comput. Surv."},{"key":"829_CR29","doi-asserted-by":"publisher","first-page":"110335","DOI":"10.1016\/j.knosys.2023.110335","volume":"264","author":"X Chen","year":"2023","unstructured":"Chen, X., Yao, L., McAuley, J., Zhou, G. & Wang, X. Deep reinforcement learning in recommender systems: a survey and new perspectives. Knowl. Based Syst. 264, 110335 (2023).","journal-title":"Knowl. Based Syst."},{"key":"829_CR30","unstructured":"Sontag, E. D. Mathematical Control Theory: Deterministic Finite Dimensional Systems (Springer, 2013)."},{"key":"829_CR31","doi-asserted-by":"crossref","unstructured":"Hespanha, J. P. Linear Systems Theory 2nd edn (Princeton Univ. Press, 2018).","DOI":"10.23943\/9781400890088"},{"key":"829_CR32","doi-asserted-by":"publisher","first-page":"1101","DOI":"10.1049\/piee.1969.0206","volume":"116","author":"D Mitra","year":"1969","unstructured":"Mitra, D. W-matrix and the geometry of model equivalence and reduction. Proc. Inst. Electr. Eng. 116, 1101\u20131106 (1969).","journal-title":"Proc. Inst. Electr. Eng."},{"key":"829_CR33","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1007\/s10208-019-09426-y","volume":"20","author":"S Dean","year":"2020","unstructured":"Dean, S., Mania, H., Matni, N., Recht, B. & Tu, S. On the sample complexity of the linear quadratic regulator. Found. Comput. Math. 20, 633\u2013679 (2020).","journal-title":"Found. Comput. Math."},{"key":"829_CR34","doi-asserted-by":"crossref","unstructured":"Tsiamis, A. & Pappas, G. J. Linear systems can be hard to learn. In Proc. 60th IEEE Conference on Decision and Control (ed. Prandini, M.) 2903\u20132910 (IEEE, 2021).","DOI":"10.1109\/CDC45484.2021.9682778"},{"key":"829_CR35","unstructured":"Tsiamis, A., Ziemann, I. M., Morari, M., Matni, N. & Pappas, G. J. Learning to control linear systems can be hard. In Proc. 35th Conference on Learning Theory (eds Loh, P.-L. & Raginsky, M.) 3820\u20133857 (PMLR, 2022)."},{"key":"829_CR36","doi-asserted-by":"crossref","unstructured":"Williams, G. et al. Information theoretic MPC for model-based reinforcement learning. In Proc. IEEE International Conference on Robotics and Automation (ed. Nakamura, Y.) 1714\u20131721 (IEEE, 2017).","DOI":"10.1109\/ICRA.2017.7989202"},{"key":"829_CR37","doi-asserted-by":"crossref","unstructured":"So, O., Wang, Z. & Theodorou, E. A. Maximum entropy differential dynamic programming. In Proc. IEEE International Conference on Robotics and Automation (ed. Kress-Gazit, H.) 3422\u20133428 (IEEE, 2022).","DOI":"10.1109\/ICRA46639.2022.9812228"},{"key":"829_CR38","unstructured":"Thrun, S. B. Efficient Exploration in Reinforcement Learning. Technical report (Carnegie Mellon Univ., 1992)."},{"key":"829_CR39","unstructured":"Amin, S., Gomrokchi, M., Satija, H., van Hoof, H. & Precup, D. A survey of exploration methods in reinforcement learning. Preprint at https:\/\/arXiv.org\/2109.00157 (2021)."},{"key":"829_CR40","doi-asserted-by":"publisher","first-page":"620","DOI":"10.1103\/PhysRev.106.620","volume":"106","author":"ET Jaynes","year":"1957","unstructured":"Jaynes, E. T. Information theory and statistical mechanics. Phys. Rev. 106, 620\u2013630 (1957).","journal-title":"Phys. Rev."},{"key":"829_CR41","doi-asserted-by":"publisher","first-page":"010901","DOI":"10.1063\/1.5012990","volume":"148","author":"PD Dixit","year":"2018","unstructured":"Dixit, P. D. et al. Perspective: maximum caliber is a general variational principle for dynamical systems. J. Chem. Phys. 148, 010901 (2018).","journal-title":"J. Chem. Phys."},{"key":"829_CR42","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1126\/science.abc6182","volume":"371","author":"P Chvykov","year":"2021","unstructured":"Chvykov, P. et al. Low rattling: a predictive principle for self-organization in active collectives. Science 371, 90\u201395 (2021).","journal-title":"Science"},{"key":"829_CR43","unstructured":"Kapur, J. N. Maximum Entropy Models in Science and Engineering (Wiley, 1989)."},{"key":"829_CR44","doi-asserted-by":"publisher","first-page":"1907","DOI":"10.1073\/pnas.1421798112","volume":"112","author":"CC Moore","year":"2015","unstructured":"Moore, C. C. Ergodic theorem, ergodic theory, and statistical mechanics. Proc. Natl Acad. Sci. USA 112, 1907\u20131911 (2015).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"829_CR45","doi-asserted-by":"publisher","first-page":"102576","DOI":"10.1016\/j.mechatronics.2021.102576","volume":"77","author":"AT Taylor","year":"2021","unstructured":"Taylor, A. T., Berrueta, T. A. & Murphey, T. D. Active learning in robotics: a review of control principles. Mechatronics 77, 102576 (2021).","journal-title":"Mechatronics"},{"key":"829_CR46","unstructured":"Seo, Y. et al. State entropy maximization with random encoders for efficient exploration. In Proc. 38th International Conference on Machine Learning, Virtual (eds Meila, M. & Zhang, T.) 9443\u20139454 (ICML, 2021)."},{"key":"829_CR47","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-31795-2","volume":"13","author":"A Prabhakar","year":"2022","unstructured":"Prabhakar, A. & Murphey, T. Mechanical intelligence for learning embodied sensor-object relationships. Nat. Commun. 13, 4108 (2022).","journal-title":"Nat. Commun."},{"key":"829_CR48","unstructured":"Chentanez, N., Barto, A. & Singh, S. Intrinsically motivated reinforcement learning. In Proc. Advances in Neural Information Processing Systems 17 (eds Saul, L. et al.) 1281\u20131288 (MIT, 2004)."},{"key":"829_CR49","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A. A. & Darrell, T. Curiosity-driven exploration by self-supervised prediction. In Proc. 34th International Conference on Machine Learning (eds Precup, D. & Teh, Y. W.) 2778\u20132787 (JLMR.org, 2017).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"829_CR50","unstructured":"Taiga, A. A., Fedus, W., Machado, M. C., Courville, A. & Bellemare, M. G. Proc. 8th International Conference on Learning Representations (ICLR, 2020)."},{"key":"829_CR51","doi-asserted-by":"publisher","first-page":"164121","DOI":"10.1063\/1.5090594","volume":"150","author":"X Wang","year":"2019","unstructured":"Wang, X., Deng, W. & Chen, Y. Ergodic properties of heterogeneous diffusion processes in a potential well. J. Chem. Phys. 150, 164121 (2019).","journal-title":"J. Chem. Phys."},{"key":"829_CR52","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1080\/00018738200101438","volume":"31","author":"RG Palmer","year":"1982","unstructured":"Palmer, R. G. Broken ergodicity. Adv. Phys. 31, 669\u2013735 (1982).","journal-title":"Adv. Phys."},{"key":"829_CR53","unstructured":"Islam, R., Henderson, P., Gomrokchi, M. & Precup, D. Reproducibility of benchmarked deep reinforcement learning tasks for continuous control. Preprint at https:\/\/arXiv.org\/1708.04133 (2017)."},{"key":"829_CR54","doi-asserted-by":"publisher","first-page":"276","DOI":"10.3390\/make4010013","volume":"4","author":"J Moos","year":"2022","unstructured":"Moos, J. et al. Robust reinforcement learning: a review of foundations and recent advances. Mach. Learn. Knowl. Extr. 4, 276\u2013315 (2022).","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"829_CR55","doi-asserted-by":"crossref","unstructured":"Strehl, A. L., Li, L., Wiewiora, E., Langford, J. & Littman, M. L. PAC model-free reinforcement learning. In Proc. 23rd International Conference on Machine Learning (eds Cohen, W. W. & Moore, A.) 881\u2013888 (ICML, 2006).","DOI":"10.1145\/1143844.1143955"},{"key":"829_CR56","unstructured":"Strehl, A. L., Li, L. & Littman, M. L. Reinforcement learning in finite MDPs: PAC analysis. J. Mach. Learn. Res. 10, 2413\u20132444 (2009)."},{"key":"829_CR57","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1613\/jair.1.14174","volume":"76","author":"R Kirk","year":"2023","unstructured":"Kirk, R., Zhang, A., Grefenstette, E. & Rockt\u00e4aschel, T. A survey of zero-shot generalisation in deep reinforcement learning. J. Artif. Intell. Res. 76, 201\u2013264 (2023).","journal-title":"J. Artif. Intell. Res."},{"key":"829_CR58","unstructured":"Oh, J., Singh, S., Lee, H. & Kohli, P. Zero-shot task generalization with multi-task deep reinforcement learning. In Proc. 34th International Conference on Machine Learning (eds Precup, D. & Teh, Y. W.) 2661\u20132670 (JLMR.org, 2017)."},{"key":"829_CR59","doi-asserted-by":"crossref","unstructured":"Krakauer, J. W., Hadjiosif, A. M., Xu, J., Wong, A. L. & Haith, A. M. Motor learning. Compr. Physiol. 9, 613\u2013663 (2019).","DOI":"10.1002\/cphy.c170043"},{"key":"829_CR60","unstructured":"Lu, K., Grover, A., Abbeel, P. & Mordatch, I. Proc. 9th International Conference on Learning Representations (ICLR, 2021)."},{"key":"829_CR61","unstructured":"Chen, A., Sharma, A., Levine, S. & Finn, C. You only live once: single-life reinforcement learning. In Proc. Advances in Neural Information Processing Systems 35 (eds Koyejo, S. et al.) 14784\u201314797 (NeurIPS, 2022)."},{"key":"829_CR62","doi-asserted-by":"crossref","unstructured":"Ames, A., Grizzle, J. & Tabuada, P. Control barrier function based quadratic programs with application to adaptive cruise control. In Proc. 53rd IEEE Conference on Decision and Control 6271\u20136278 (IEEE, 2014).","DOI":"10.1109\/CDC.2014.7040372"},{"key":"829_CR63","unstructured":"Taylor, A., Singletary, A., Yue, Y. & Ames, A. Learning for safety-critical control with control barrier functions. In Proc. 2nd Conference on Learning for Dynamics and Control (eds Bayen, A. et al.) 708\u2013717 (PLMR, 2020)."},{"key":"829_CR64","doi-asserted-by":"crossref","unstructured":"Xiao, W. et al. BarrierNet: differentiable control barrier functions for learning of safe robot control. IEEE Trans. Robot. 39, 2289\u20132307 (2023).","DOI":"10.1109\/TRO.2023.3249564"},{"key":"829_CR65","doi-asserted-by":"publisher","first-page":"6056","DOI":"10.1103\/PhysRevA.45.6056","volume":"45","author":"HS Seung","year":"1992","unstructured":"Seung, H. S., Sompolinsky, H. & Tishby, N. Statistical mechanics of learning from examples. Phys. Rev. A 45, 6056\u20136091 (1992).","journal-title":"Phys. Rev. A"},{"key":"829_CR66","doi-asserted-by":"publisher","first-page":"e52371","DOI":"10.7554\/eLife.52371","volume":"9","author":"C Chen","year":"2020","unstructured":"Chen, C., Murphey, T. D. & MacIver, M. A. Tuning movement for sensing in an uncertain world. eLife 9, e52371 (2020).","journal-title":"eLife"},{"key":"829_CR67","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1186\/s12984-021-00919-y","volume":"18","author":"S Song","year":"2021","unstructured":"Song, S. et al. Deep reinforcement learning for modeling human locomotion control in neuromechanical simulation. J. Neuroeng. Rehabil. 18, 126 (2021).","journal-title":"J. Neuroeng. Rehabil."},{"key":"829_CR68","doi-asserted-by":"publisher","first-page":"2300111","DOI":"10.1002\/aisy.202300111","volume":"6","author":"TA Berrueta","year":"2024","unstructured":"Berrueta, T. A., Murphey, T. D. & Truby, R. L. Materializing autonomy in soft robots across scales. Adv. Intell. Syst. 6, 2300111 (2024).","journal-title":"Adv. Intell. Syst."},{"key":"829_CR69","unstructured":"Sutton, R. S. & Barto, A. G. Reinforcement Learning: An Introduction (MIT, 2018)."},{"key":"829_CR70","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1038\/s41592-019-0686-2","volume":"17","author":"P Virtanen","year":"2020","unstructured":"Virtanen, P. et al. SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat. Methods 17, 261\u2013272 (2020).","journal-title":"Nat. Methods"},{"key":"829_CR71","doi-asserted-by":"publisher","unstructured":"Berrueta, T. A., Pinosky, A. & Murphey, T. D. Maximum diffusion reinforcement learning repository. Zenodo https:\/\/doi.org\/10.5281\/zenodo.10723320 (2024).","DOI":"10.5281\/zenodo.10723320"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00829-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00829-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00829-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,23]],"date-time":"2024-05-23T23:03:45Z","timestamp":1716505425000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00829-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,2]]},"references-count":71,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["829"],"URL":"https:\/\/doi.org\/10.1038\/s42256-024-00829-3","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,2]]},"assertion":[{"value":"3 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}