{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T22:30:25Z","timestamp":1765233025288,"version":"3.37.3"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2021,1,7]],"date-time":"2021-01-07T00:00:00Z","timestamp":1609977600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,7]],"date-time":"2021-01-07T00:00:00Z","timestamp":1609977600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100010909","name":"Young Scientists Fund","doi-asserted-by":"publisher","award":["61803162"],"award-info":[{"award-number":["61803162"]}],"id":[{"id":"10.13039\/501100010909","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s00521-020-05663-3","type":"journal-article","created":{"date-parts":[[2021,1,7]],"date-time":"2021-01-07T11:20:18Z","timestamp":1610018418000},"page":"12271-12282","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Evaluate, explain, and explore the state more exactly: an improved Actor-Critic algorithm for complex environment"],"prefix":"10.1007","volume":"35","author":[{"given":"ZhongYi","family":"Zha","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8588-4284","authenticated-orcid":false,"given":"Bo","family":"Wang","sequence":"additional","affiliation":[]},{"given":"XueSong","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,7]]},"reference":[{"key":"5663_CR1","unstructured":"Bellemare MG, Dabney W, Munos R (2017) A distributional perspective on reinforcement learning. In: Proceedings of the 34th international conference on machine learning-volume 70, pp 449\u2013458. JMLR. org"},{"key":"5663_CR2","unstructured":"Burda Y, Edwards H, Pathak D, Storkey A, Darrell T, Efros A.A (2018) Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355"},{"key":"5663_CR3","unstructured":"Fortunato M, Azar MG, Piot B, Menick J, Osband I, Graves A, Mnih V, Munos R, Hassabis D, Pietquin O, et\u00a0al (2017) Noisy networks for exploration. arXiv preprint arXiv:1706.10295"},{"issue":"4","key":"5663_CR4","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1016\/j.neunet.2010.01.001","volume":"23","author":"M Grze\u015b","year":"2010","unstructured":"Grze\u015b M, Kudenko D (2010) Online learning of shaping rewards in reinforcement learning. Neural Netw 23(4):541\u2013550","journal-title":"Neural Netw"},{"key":"5663_CR5","doi-asserted-by":"crossref","unstructured":"Hessel M, Modayil J, Van Hasselt H, Schaul T, Ostrovski G, Dabney W, Horgan D, Piot B, Azar M, Silver D (2018) Rainbow: combining improvements in deep reinforcement learning. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"5663_CR6","doi-asserted-by":"crossref","unstructured":"Hou Y, Liu L, Wei Q, Xu X, Chen C (2017) A novel ddpg method with prioritized experience replay. In: 2017 IEEE international conference on systems, man, and cybernetics (SMC), pp 316\u2013321. IEEE","DOI":"10.1109\/SMC.2017.8122622"},{"key":"5663_CR7","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"5663_CR8","doi-asserted-by":"crossref","unstructured":"Konda VR, Borkar VS (1999) Actor-critic-type learning algorithms for markov decision processes. SIAM J Control Optim","DOI":"10.1137\/S036301299731669X"},{"key":"5663_CR9","doi-asserted-by":"crossref","unstructured":"Leo R, Milton R, Sibi, S (2014) Reinforcement learning for optimal energy management of a solar microgrid. In: 2014 IEEE global humanitarian technology conference-south asia satellite (GHTC-SAS), pp 183\u2013188. IEEE","DOI":"10.1109\/GHTC-SAS.2014.6967580"},{"key":"5663_CR10","doi-asserted-by":"crossref","unstructured":"Li J, Monroe W, Ritter A, Galley M, Gao J, Jurafsky D (2016) Deep reinforcement learning for dialogue generation. arXiv preprint arXiv:1606.01541","DOI":"10.18653\/v1\/D16-1127"},{"key":"5663_CR11","doi-asserted-by":"crossref","unstructured":"Mahadevan, S (1994) To discount or not to discount in reinforcement learning: a case study comparing r learning and q learning. In: Machine learning proceedings 1994, pp 164\u2013172. Elsevier","DOI":"10.1016\/B978-1-55860-335-6.50028-3"},{"key":"5663_CR12","unstructured":"Mnih V, Badia A P , Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu, K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, pp. 1928\u20131937"},{"issue":"7540","key":"5663_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"4","key":"5663_CR14","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TCIAIG.2013.2286295","volume":"5","author":"S Ontan\u00f3n","year":"2013","unstructured":"Ontan\u00f3n S, Synnaeve G, Uriarte A, Richoux F, Churchill D, Preuss M (2013) A survey of real-time strategy game ai research and competition in starcraft. IEEE Trans Comput Intell AI Games 5(4):293\u2013311","journal-title":"IEEE Trans Comput Intell AI Games"},{"key":"5663_CR15","doi-asserted-by":"crossref","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell, T (2017) Curiosity-driven exploration by self-supervised prediction. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 16\u201317","DOI":"10.1109\/CVPRW.2017.70"},{"key":"5663_CR16","unstructured":"Prasad N, et\u00a0al (2020) Methods for reinforcement learning in clinical decision support"},{"key":"5663_CR17","doi-asserted-by":"publisher","first-page":"70","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023","volume":"19","author":"AE Sallab","year":"2017","unstructured":"Sallab AE, Abdou M, Perot E, Yogamani S (2017) Deep reinforcement learning framework for autonomous driving. Electron Imaging 19:70\u201376","journal-title":"Electron Imaging"},{"key":"5663_CR18","unstructured":"Santoro A, Raposo D, Barrett DG, Malinowski M, Pascanu R, Battaglia P, Lillicrap T (2017) A simple neural network module for relational reasoning. In: Advances in neural information processing systems, pp 4967\u20134976"},{"key":"5663_CR19","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"5663_CR20","doi-asserted-by":"crossref","unstructured":"Shantia A, Begue E, Wiering M (2011) Connectionist reinforcement learning for intelligent unit micro management in starcraft. In: The 2011 international joint conference on neural networks, pp 1794\u20131801. IEEE","DOI":"10.1109\/IJCNN.2011.6033442"},{"issue":"1","key":"5663_CR21","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1109\/TETCI.2018.2823329","volume":"3","author":"K Shao","year":"2018","unstructured":"Shao K, Zhu Y, Zhao D (2018) Starcraft micromanagement with reinforcement learning and curriculum transfer learning. IEEE Trans Emerg Topics Comput Intell 3(1):73\u201384","journal-title":"IEEE Trans Emerg Topics Comput Intell"},{"issue":"6419","key":"5663_CR22","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T et al (2018) A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419):1140\u20131144","journal-title":"Science"},{"issue":"7676","key":"5663_CR23","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"key":"5663_CR24","doi-asserted-by":"crossref","unstructured":"Van Hasselt H, Guez, A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Thirtieth AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v30i1.10295"},{"issue":"7782","key":"5663_CR25","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM, Mathieu M, Dudzik A, Chung J, Choi DH, Powell R, Ewalds T, Georgiev P et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782):350\u2013354","journal-title":"Nature"},{"key":"5663_CR26","unstructured":"Vinyals O, Ewalds T, Bartunov S, Georgiev P, Vezhnevets AS, Yeo M, Makhzani A,m K\u00fcttler H, Agapiou J, Schrittwieser J, et\u00a0al (2017) Starcraft ii: a new challenge for reinforcement learning. arXiv preprint arXiv:1708.04782"},{"key":"5663_CR27","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N(2016) Dueling network architectures for deep reinforcement learning. In: International conference on machine learning, pp 1995\u20132003"},{"issue":"3\u20134","key":"5663_CR28","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Mach Learn 8(3\u20134):279\u2013292","journal-title":"Mach Learn"},{"key":"5663_CR29","unstructured":"Wu Y, Mansimov E, Grosse RB, Liao S, Ba J (2017) Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. In: Advances in neural information processing systems, pp 5279\u20135288"},{"key":"5663_CR30","unstructured":"Xingjian S, Chen Z, Wang H, Yeung DY, Wong WK, Woo WC (2015) Convolutional lstm network: a machine learning approach for precipitation nowcasting. In: Advances in neural information processing systems, pp 802\u2013810"},{"key":"5663_CR31","unstructured":"Zhelo O, Zhang J, Tai L, Liu M, Burgard W (2018) Curiosity-driven exploration for mapless navigation with deep reinforcement learning. arXiv preprint arXiv:1804.00456"},{"key":"5663_CR32","doi-asserted-by":"crossref","unstructured":"Zhu Y, Mottaghi R, Kolve E, Lim JJ, Gupta A, Fei-Fei L, Farhadi A (2017) Target-driven visual navigation in indoor scenes using deep reinforcement learning. In: 2017 IEEE international conference on robotics and automation (ICRA), pp 3357\u20133364. IEEE","DOI":"10.1109\/ICRA.2017.7989381"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05663-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-020-05663-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05663-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,11]],"date-time":"2023-05-11T17:27:31Z","timestamp":1683826051000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-020-05663-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,7]]},"references-count":32,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["5663"],"URL":"https:\/\/doi.org\/10.1007\/s00521-020-05663-3","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2021,1,7]]},"assertion":[{"value":"28 August 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"We declare that we have no financial and personal relationships with other people or organizations that can inappropriately influence our work, and there is no professional or other personal interest of any nature or kind in any product, service and company that could be construed as influencing the position presented in, or the review of, the manuscript entitled, \u201cEvaluate, explain, and explore the state more exactly: an improved Actor-Critic algorithm for complex environment.\u201d","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}