{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T19:42:33Z","timestamp":1743104553678,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":74,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819754885"},{"type":"electronic","value":"9789819754892"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5489-2_17","type":"book-chapter","created":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T03:48:02Z","timestamp":1721965682000},"page":"188-202","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning for\u00a0Scientific Application: A Survey"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3459-5105","authenticated-orcid":false,"given":"Zhikuang","family":"Xin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0526-6172","authenticated-orcid":false,"given":"Zhenghong","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Zhu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5400-5243","authenticated-orcid":false,"given":"Xiaoguang","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5573-9986","authenticated-orcid":false,"given":"Jue","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5694-1617","authenticated-orcid":false,"given":"Yangang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,27]]},"reference":[{"issue":"1","key":"17_CR1","first-page":"1582","volume":"17","author":"S Abdallah","year":"2016","unstructured":"Abdallah, S., Kaisers, M.: Addressing environment non-stationarity by repeating q-learning updates. J. Mach. Learn. Res. 17(1), 1582\u20131612 (2016)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"17_CR2","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1109\/LRA.2020.2966414","volume":"5","author":"A Amini","year":"2020","unstructured":"Amini, A., et al.: Learning robust control policies for end-to-end autonomous driving from data-driven simulation. IEEE Robot. Autom. Lett. 5(2), 1143\u20131150 (2020). https:\/\/doi.org\/10.1109\/LRA.2020.2966414","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"2","key":"17_CR3","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/TITS.2020.3024655","volume":"23","author":"S Aradi","year":"2022","unstructured":"Aradi, S.: Survey of deep reinforcement learning for motion planning of autonomous vehicles. IEEE Trans. Intell. Transp. Syst. 23(2), 740\u2013759 (2022). https:\/\/doi.org\/10.1109\/TITS.2020.3024655","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"7836","key":"17_CR4","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1038\/s41586-020-2939-8","volume":"588","author":"MG Bellemare","year":"2020","unstructured":"Bellemare, M.G., et al.: Autonomous navigation of stratospheric balloons using reinforcement learning. Nature 588(7836), 77\u201382 (2020)","journal-title":"Nature"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Bellinger, C., Drozdyuk, A., Crowley, M., Tamblyn, I.: Balancing information with observation costs in deep reinforcement learning, 12 p. (2022). https:\/\/caiac.pubpub.org\/pub\/0jmy7gpd\/release\/1","DOI":"10.21428\/594757db.8e09102d"},{"issue":"1","key":"17_CR6","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1080\/1350486X.2022.2136727","volume":"29","author":"P Casgrain","year":"2022","unstructured":"Casgrain, P., Ning, B., Jaimungal, S.: Deep q-learning for Nash equilibria: Nash-DQN. Appl. Math. Financ. 29(1), 62\u201378 (2022)","journal-title":"Appl. Math. Financ."},{"issue":"11","key":"17_CR7","doi-asserted-by":"publisher","first-page":"13702","DOI":"10.1109\/TVT.2020.3023733","volume":"69","author":"YJ Chen","year":"2020","unstructured":"Chen, Y.J., Chang, D.K., Zhang, C.: Autonomous tracking using a swarm of UAVs: a constrained multi-agent reinforcement learning approach. IEEE Trans. Veh. Technol. 69(11), 13702\u201313717 (2020)","journal-title":"IEEE Trans. Veh. Technol."},{"issue":"3","key":"17_CR8","doi-asserted-by":"publisher","first-page":"1086","DOI":"10.1109\/TITS.2019.2901791","volume":"21","author":"T Chu","year":"2019","unstructured":"Chu, T., Wang, J., Codec\u00e0, L., Li, Z.: Multi-agent deep reinforcement learning for large-scale traffic signal control. IEEE Trans. Intell. Transp. Syst. 21(3), 1086\u20131095 (2019)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Ding, R., Yang, Y., Liu, J., Li, H., Gao, F.: Packet routing against network congestion: a deep multi-agent reinforcement learning approach. In: 2020 International Conference on Computing, Networking and Communications (ICNC), pp. 932\u2013937. IEEE (2020)","DOI":"10.1109\/ICNC47757.2020.9049759"},{"issue":"2","key":"17_CR10","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1177\/0278364907084980","volume":"27","author":"G Endo","year":"2008","unstructured":"Endo, G., Morimoto, J., Matsubara, T., Nakanishi, J., Cheng, G.: Learning CPG-based biped locomotion with a policy gradient method: application to a humanoid robot. Int. J. Robot. Res. 27(2), 213\u2013228 (2008)","journal-title":"Int. J. Robot. Res."},{"issue":"4","key":"17_CR11","doi-asserted-by":"publisher","first-page":"2432","DOI":"10.1109\/COMST.2017.2707140","volume":"19","author":"ZM Fadlullah","year":"2017","unstructured":"Fadlullah, Z.M., et al.: State-of-the-art deep learning: evolving machine intelligence toward tomorrow\u2019s intelligent network traffic control systems. IEEE Commun. Surv. Tutor. 19(4), 2432\u20132455 (2017)","journal-title":"IEEE Commun. Surv. Tutor."},{"issue":"4","key":"17_CR12","doi-asserted-by":"publisher","first-page":"890","DOI":"10.1109\/TGCN.2018.2837618","volume":"2","author":"Q Fan","year":"2018","unstructured":"Fan, Q., Ansari, N.: Towards throughput aware and energy aware traffic load balancing in heterogeneous networks with hybrid power supplies. IEEE Trans. Green Commun. Network. 2(4), 890\u2013898 (2018)","journal-title":"IEEE Trans. Green Commun. Network."},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Fawzi, A., et\u00a0al.: Discovering faster matrix multiplication algorithms with reinforcement learning. Nature 610(7930), 47\u201353 (2022)","DOI":"10.1038\/s41586-022-05172-4"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"issue":"3","key":"17_CR15","volume":"8","author":"T F\u00f6sel","year":"2018","unstructured":"F\u00f6sel, T., Tighineanu, P., Weiss, T., Marquardt, F.: Reinforcement learning with neural networks for quantum feedback. Phys. Rev. X 8(3), 031084 (2018)","journal-title":"Phys. Rev. X"},{"key":"17_CR16","unstructured":"Garaffa, L.C., Basso, M., Konzen, A.A., de\u00a0Freitas, E.P.: Reinforcement learning for mobile robotics exploration: a survey. IEEE Trans. Neural Netw. Learn. Syst. (2021)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Graves, A., Graves, A.: Long short-term memory. In: Supervised Sequence Labelling with Recurrent Neural Networks, pp. 37\u201345 (2012)","DOI":"10.1007\/978-3-642-24797-2_4"},{"key":"17_CR18","unstructured":"Hausknecht, M., Stone, P.: Deep recurrent q-learning for partially observable MDPS. In: 2015 AAAI fall Symposium Series (2015)"},{"key":"17_CR19","unstructured":"Hoffman, M.W., et al.: ACME: a research framework for distributed reinforcement learning. arXiv preprint arXiv:2006.00979 (2020). https:\/\/arxiv.org\/abs\/2006.00979"},{"issue":"6","key":"17_CR20","doi-asserted-by":"publisher","first-page":"1421","DOI":"10.23919\/JSEE.2021.000121","volume":"32","author":"Z Jiandong","year":"2021","unstructured":"Jiandong, Z., Qiming, Y., Guoqing, S., Yi, L., Yong, W.: UAV cooperative air combat maneuver decision based on multi-agent reinforcement learning. J. Syst. Eng. Electron. 32(6), 1421\u20131438 (2021)","journal-title":"J. Syst. Eng. Electron."},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Kalakrishnan, M., Righetti, L., Pastor, P., Schaal, S.: Learning force control policies for compliant manipulation. In: 2011 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 4639\u20134644. IEEE (2011)","DOI":"10.1109\/IROS.2011.6095096"},{"issue":"6","key":"17_CR22","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2021","unstructured":"Kiran, B.R., et al.: Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"6","key":"17_CR23","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2022","unstructured":"Kiran, B.R., et al.: Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2022). https:\/\/doi.org\/10.1109\/TITS.2021.3054625","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Kohl, N., Stone, P.: Policy gradient reinforcement learning for fast quadrupedal locomotion. In: IEEE International Conference on Robotics and Automation. Proceedings. ICRA\u201904, vol.\u00a03, pp. 2619\u20132624. IEEE (2004)","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Kurach, K., et\u00a0al.: Google research football: a novel reinforcement learning environment. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 4501\u20134510 (2020)","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"17_CR26","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"17_CR27","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter\u00a0Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"17_CR28","unstructured":"Mirhoseini, A., et\u00a0al.: Chip placement with deep reinforcement learning. arXiv preprint arXiv:2004.10746 (2020)"},{"key":"17_CR29","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"17_CR30","doi-asserted-by":"crossref","unstructured":"Mnih, V., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"17_CR31","unstructured":"Moritz, P., et\u00a0al.: Ray: a distributed framework for emerging $$\\{$$AI$$\\}$$ applications. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp. 561\u2013577 (2018)"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Now\u00e9, A., Vrancx, P., De\u00a0Hauwere, Y.M.: Game theory and multi-agent reinforcement learning. In: Reinforcement Learning: State-of-the-Art, pp. 441\u2013470 (2012)","DOI":"10.1007\/978-3-642-27645-3_14"},{"key":"17_CR33","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: Advances in Neural Information Processing Systems, vol. 35, pp. 27730\u201327744 (2022)"},{"key":"17_CR34","doi-asserted-by":"crossref","unstructured":"Peters, J., Mulling, K., Altun, Y.: Relative entropy policy search. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a024, pp. 1607\u20131612 (2010)","DOI":"10.1609\/aaai.v24i1.7727"},{"issue":"4","key":"17_CR35","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Reinforcement learning of motor skills with policy gradients. Neural Netw. 21(4), 682\u2013697 (2008)","journal-title":"Neural Netw."},{"key":"17_CR36","doi-asserted-by":"crossref","unstructured":"Popova, M., Isayev, O., Tropsha, A.: Deep reinforcement learning for de novo drug design. Sci. Adv. 4(7), eaap7885 (2018)","DOI":"10.1126\/sciadv.aap7885"},{"issue":"1","key":"17_CR37","first-page":"7234","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid, T., Samvelyan, M., De Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. J. Mach. Learn. Res. 21(1), 7234\u20137284 (2020)","journal-title":"J. Mach. Learn. Res."},{"issue":"5","key":"17_CR38","doi-asserted-by":"publisher","first-page":"5003","DOI":"10.1109\/TVT.2021.3074304","volume":"70","author":"A Sacco","year":"2021","unstructured":"Sacco, A., Esposito, F., Marchetto, G., Montuschi, P.: Sustainable task offloading in UAV networks via multi-agent reinforcement learning. IEEE Trans. Veh. Technol. 70(5), 5003\u20135015 (2021)","journal-title":"IEEE Trans. Veh. Technol."},{"key":"17_CR39","unstructured":"Samvelyan, M., et al.: The Starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)"},{"issue":"7698","key":"17_CR40","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/nature25978","volume":"555","author":"MH Segler","year":"2018","unstructured":"Segler, M.H., Preuss, M., Waller, M.P.: Planning chemical syntheses with deep neural networks and symbolic AI. Nature 555(7698), 604\u2013610 (2018)","journal-title":"Nature"},{"key":"17_CR41","doi-asserted-by":"crossref","unstructured":"Silver, D., et\u00a0al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","DOI":"10.1038\/nature16961"},{"key":"17_CR42","unstructured":"Son, K., Ahn, S., Reyes, R.D., Shin, J., Yi, Y.: Qtran++: improved value transformation for cooperative multi-agent reinforcement learning. arXiv preprint arXiv:2006.12010 (2020)"},{"key":"17_CR43","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D.E., Yi, Y.: Qtran: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 5887\u20135896. PMLR (2019)"},{"key":"17_CR44","unstructured":"Stooke, A., Abbeel, P.: rlpyt: A research code base for deep reinforcement learning in PyTorch. arXiv preprint arXiv:1909.01500 (2019)"},{"issue":"10","key":"17_CR45","doi-asserted-by":"publisher","first-page":"1433","DOI":"10.1021\/acsmaterialslett.1c00390","volume":"3","author":"F Sui","year":"2021","unstructured":"Sui, F., Guo, R., Zhang, Z., Gu, G.X., Lin, L.: Deep reinforcement learning for digital materials design. ACS Materi. Lett. 3(10), 1433\u20131439 (2021)","journal-title":"ACS Materi. Lett."},{"key":"17_CR46","unstructured":"Sunehag, P., et\u00a0al.: Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)"},{"key":"17_CR47","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"issue":"4","key":"17_CR48","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu, A., et al.: Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4), e0172395 (2017)","journal-title":"PLoS ONE"},{"key":"17_CR49","doi-asserted-by":"crossref","unstructured":"Theodorou, E., Buchli, J., Schaal, S.: Reinforcement learning of motor skills in high dimensions: a path integral approach. In: 2010 IEEE International Conference on Robotics and Automation, pp. 2397\u20132403. IEEE (2010)","DOI":"10.1109\/ROBOT.2010.5509336"},{"issue":"3","key":"17_CR50","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ac7ddc","volume":"3","author":"LA Thiede","year":"2022","unstructured":"Thiede, L.A., Krenn, M., Nigam, A., Aspuru-Guzik, A.: Curiosity in exploring chemical spaces: intrinsic rewards for molecular reinforcement learning. Mach. Learn. Sci. Technol. 3(3), 035008 (2022)","journal-title":"Mach. Learn. Sci. Technol."},{"key":"17_CR51","doi-asserted-by":"publisher","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012). https:\/\/doi.org\/10.1109\/IROS.2012.6386109","DOI":"10.1109\/IROS.2012.6386109"},{"key":"17_CR52","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a030 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"17_CR53","unstructured":"Ramakrishnaiah, V., et al.: Easily extendable architecture for reinforcement learning (exarl) (2020). https:\/\/github.com\/exalearn\/EXARL"},{"key":"17_CR54","unstructured":"Wang, J., Ren, Z., Liu, T., Yu, Y., Zhang, C.: Qplex: duplex dueling multi-agent q-learning. arXiv preprint arXiv:2008.01062 (2020)"},{"issue":"40","key":"17_CR55","doi-asserted-by":"publisher","first-page":"10959","DOI":"10.1039\/D0SC04184J","volume":"11","author":"X Wang","year":"2020","unstructured":"Wang, X., et al.: Towards efficient discovery of green synthetic pathways with Monte Carlo tree search and reinforcement learning. Chem. Sci. 11(40), 10959\u201310972 (2020)","journal-title":"Chem. Sci."},{"issue":"5","key":"17_CR56","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1631\/FITEE.1900637","volume":"22","author":"Y Wang","year":"2021","unstructured":"Wang, Y., Zheng, K., Tian, D., Duan, X., Zhou, J.: Pre-training with asynchronous supervised learning for reinforcement learning based autonomous driving. Front. Inf. Technol. Electron. Eng. 22(5), 673\u2013686 (2021)","journal-title":"Front. Inf. Technol. Electron. Eng."},{"key":"17_CR57","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1995\u20132003. PMLR (2016)"},{"key":"17_CR58","doi-asserted-by":"crossref","unstructured":"Wen, G., Fu, J., Dai, P., Zhou, J.: DTDE: a new cooperative multi-agent reinforcement learning framework. Innovation 2(4) (2021)","DOI":"10.1016\/j.xinn.2021.100162"},{"key":"17_CR59","unstructured":"Weng, J., et al.: Tianshou: a highly modularized deep reinforcement learning library. J. Mach. Learn. Res. 23(267), 1\u20136 (2022). http:\/\/jmlr.org\/papers\/v23\/21-1127.html"},{"key":"17_CR60","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8, 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"17_CR61","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2019.07.026","volume":"183","author":"S Yang","year":"2019","unstructured":"Yang, S., Yang, B., Wong, H.S., Kang, Z.: Cooperative traffic signal control using multi-step return and off-policy asynchronous advantage actor-critic graph algorithm. Knowl.-Based Syst. 183, 104855 (2019)","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"17_CR62","doi-asserted-by":"publisher","first-page":"972","DOI":"10.1080\/14686996.2017.1401424","volume":"18","author":"X Yang","year":"2017","unstructured":"Yang, X., Zhang, J., Yoshizoe, K., Terayama, K., Tsuda, K.: Chemts: an efficient python library for de novo molecular generation. Sci. Technol. Adv. Mater. 18(1), 972\u2013976 (2017)","journal-title":"Sci. Technol. Adv. Mater."},{"key":"17_CR63","doi-asserted-by":"publisher","unstructured":"Ye, F., Cheng, X., Wang, P., Chan, C., Zhang, J.: Automated lane change strategy using proximal policy optimization-based deep reinforcement learning. In: IEEE Intelligent Vehicles Symposium, IV 2020, Las Vegas, NV, USA, 19 October\u201313 November 2020, pp. 1746\u20131752. IEEE (2020). https:\/\/doi.org\/10.1109\/IV47402.2020.9304668","DOI":"10.1109\/IV47402.2020.9304668"},{"key":"17_CR64","doi-asserted-by":"publisher","unstructured":"Ye, F., Wang, P., Chan, C., Zhang, J.: Meta reinforcement learning-based lane change strategy for autonomous vehicles. In: IEEE Intelligent Vehicles Symposium, IV 2021, Nagoya, Japan, 11\u201317 July 2021, pp. 223\u2013230. IEEE (2021). https:\/\/doi.org\/10.1109\/IV48863.2021.9575379","DOI":"10.1109\/IV48863.2021.9575379"},{"issue":"6","key":"17_CR65","doi-asserted-by":"publisher","first-page":"5185","DOI":"10.1109\/TSG.2021.3103917","volume":"12","author":"Y Ye","year":"2021","unstructured":"Ye, Y., Tang, Y., Wang, H., Zhang, X.P., Strbac, G.: A scalable privacy-preserving multi-agent deep reinforcement learning approach for large-scale peer-to-peer transactive energy trading. IEEE Trans. Smart Grid 12(6), 5185\u20135200 (2021)","journal-title":"IEEE Trans. Smart Grid"},{"issue":"2","key":"17_CR66","doi-asserted-by":"publisher","first-page":"855","DOI":"10.1109\/TSMC.2020.3012832","volume":"52","author":"X You","year":"2020","unstructured":"You, X., Li, X., Xu, Y., Feng, H., Zhao, J., Yan, H.: Toward packet routing with fully distributed multiagent deep reinforcement learning. IEEE Trans. Syst. Man Cybern. Syst. 52(2), 855\u2013868 (2020)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"2","key":"17_CR67","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1109\/TITS.2019.2893683","volume":"21","author":"C Yu","year":"2019","unstructured":"Yu, C., et al.: Distributed multiagent coordinated learning for autonomous driving in highways based on dynamic coordination graphs. IEEE Trans. Intell. Transp. Syst. 21(2), 735\u2013748 (2019)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"17_CR68","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Yang, Q., An, D., Li, D., Wu, Z.: Multistep multiagent reinforcement learning for optimal energy schedule strategy of charging stations in smart grid. IEEE Trans. Cybern. (2022)","DOI":"10.1109\/TCYB.2022.3165074"},{"key":"17_CR69","doi-asserted-by":"publisher","unstructured":"Zhao, W., Queralta, J.P., Westerlund, T.: Sim-to-real transfer in deep reinforcement learning for robotics: a survey. In: 2020 IEEE Symposium Series on Computational Intelligence. SSCI 2020, Canberra, Australia, 1\u20134 December 2020, pp. 737\u2013744. IEEE (2020). https:\/\/doi.org\/10.1109\/SSCI47803.2020.9308468","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"17_CR70","doi-asserted-by":"crossref","unstructured":"Zhao, X., Xia, L., Zhang, L., Ding, Z., Yin, D., Tang, J.: Deep reinforcement learning for page-wise recommendations. In: Proceedings of the 12th ACM Conference on Recommender Systems, pp. 95\u2013103 (2018)","DOI":"10.1145\/3240323.3240374"},{"key":"17_CR71","doi-asserted-by":"crossref","unstructured":"Zheng, B., Zheng, Z., Gu, G.X.: Designing mechanically tough graphene oxide materials using deep reinforcement learning. NPJ Comput. Mater. 8(1), 225 (2022)","DOI":"10.1038\/s41524-022-00919-z"},{"key":"17_CR72","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1007\/978-3-030-86517-7_11","volume-title":"Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track","author":"B Zhou","year":"2021","unstructured":"Zhou, B., Zeng, H., Liu, Y., Li, K., Wang, F., Tian, H.: Action set based policy optimization for safe power grid management. In: Dong, Y., Kourtellis, N., Hammer, B., Lozano, J.A. (eds.) ECML PKDD 2021, Part V. LNCS (LNAI), vol. 12979, pp. 168\u2013181. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86517-7_11"},{"key":"17_CR73","unstructured":"Zhou, M., et al.: Malib: a parallel framework for population-based multi-agent reinforcement learning. J. Mach. Learn. Res. 24(150), 1\u201312 (2023). http:\/\/jmlr.org\/papers\/v24\/22-0169.html"},{"issue":"1","key":"17_CR74","doi-asserted-by":"publisher","first-page":"10752","DOI":"10.1038\/s41598-019-47148-x","volume":"9","author":"Z Zhou","year":"2019","unstructured":"Zhou, Z., Kearnes, S., Li, L., Zare, R.N., Riley, P.: Optimization of molecules via deep reinforcement learning. Sci. Rep. 9(1), 10752 (2019)","journal-title":"Sci. Rep."}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5489-2_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T03:52:04Z","timestamp":1721965924000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5489-2_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819754885","9789819754892"],"references-count":74,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5489-2_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"27 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Birmingham","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ai-edge.net\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}