{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:21:22Z","timestamp":1740108082569,"version":"3.37.3"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T00:00:00Z","timestamp":1708646400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T00:00:00Z","timestamp":1708646400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["71771096"],"award-info":[{"award-number":["71771096"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00521-024-09513-4","type":"journal-article","created":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T15:02:28Z","timestamp":1708700548000},"page":"8485-8500","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A double Actor-Critic learning system embedding improved Monte Carlo tree search"],"prefix":"10.1007","volume":"36","author":[{"given":"Hongjun","family":"Zhu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8151-2400","authenticated-orcid":false,"given":"Yong","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Suijun","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,23]]},"reference":[{"key":"9513_CR1","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller M (2013) Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602"},{"key":"9513_CR2","unstructured":"Rashid T, Samvelyan M, Schroeder C, Farquhar G, Foerster J, Whiteson S (2018) Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International conference on machine learning, pp 4295\u20134304. PMLR"},{"issue":"11","key":"9513_CR3","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"key":"9513_CR4","doi-asserted-by":"publisher","first-page":"102202","DOI":"10.1016\/j.rcim.2021.102202","volume":"72","author":"T Zhou","year":"2021","unstructured":"Zhou T, Tang D, Zhu H, Zhang Z (2021) Multi-agent reinforcement learning for online scheduling in smart factories. Robot Comput-Integr Manuf 72:102202","journal-title":"Robot Comput-Integr Manuf"},{"issue":"2","key":"9513_CR5","doi-asserted-by":"publisher","first-page":"654","DOI":"10.1016\/j.ejor.2017.11.054","volume":"270","author":"T Fischer","year":"2018","unstructured":"Fischer T, Krauss C (2018) Deep learning with long short-term memory networks for financial market predictions. Eur J Oper Res 270(2):654\u2013669","journal-title":"Eur J Oper Res"},{"issue":"11","key":"9513_CR6","doi-asserted-by":"publisher","first-page":"393","DOI":"10.3390\/a15110393","volume":"15","author":"A Namdari","year":"2022","unstructured":"Namdari A, Samani MA, Durrani TS (2022) Lithium-ion battery prognostics through reinforcement learning based on entropy measures. Algorithms 15(11):393","journal-title":"Algorithms"},{"issue":"9","key":"9513_CR7","doi-asserted-by":"publisher","first-page":"1699","DOI":"10.1007\/s10994-019-05849-4","volume":"109","author":"S-A Chen","year":"2020","unstructured":"Chen S-A, Tangkaratt V, Lin H-T, Sugiyama M (2020) Active deep q-learning with demonstration. Mach Learn 109(9):1699\u20131725","journal-title":"Mach Learn"},{"key":"9513_CR8","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence, vol 30","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"9513_CR9","doi-asserted-by":"crossref","unstructured":"Meng L, Yazidi A, Goodwin M, Engelstad P (2022) Expert q-learning: deep reinforcement learning with coarse state values from offline expert examples. In: Proceedings of the northern lights deep learning workshop, vol 3","DOI":"10.7557\/18.6237"},{"issue":"17","key":"9513_CR10","doi-asserted-by":"publisher","first-page":"11415","DOI":"10.1007\/s00521-020-05639-3","volume":"33","author":"TS Panag","year":"2021","unstructured":"Panag TS, Dhillon J (2021) Predator-prey optimization based clustering algorithm for wireless sensor networks. Neural Comput Appl 33(17):11415\u201311435","journal-title":"Neural Comput Appl"},{"key":"9513_CR11","unstructured":"Thrun S, Schwartz A (1993) Issues in using function approximation for reinforcement learning. In: Proceedings of the 1993 connectionist models summer school Hillsdale, NJ. Lawrence Erlbaum, vol 6, pp 1\u20139"},{"key":"9513_CR12","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971"},{"key":"9513_CR13","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. Adv Neural Inf Process Syst 12"},{"issue":"5","key":"9513_CR14","first-page":"3111","volume":"52","author":"P Lv","year":"2020","unstructured":"Lv P, Wang X, Cheng Y, Duan Z, Chen CP (2020) Integrated double estimator architecture for reinforcement learning. IEEE Trans Cybern 52(5):3111\u20133122","journal-title":"IEEE Trans Cybern"},{"key":"9513_CR15","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in Actor-Critic methods. In: International conference on machine learning, pp 1587\u20131596. PMLR"},{"key":"9513_CR16","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1016\/j.neucom.2021.12.039","volume":"474","author":"H Wu","year":"2022","unstructured":"Wu H, Zhang J, Wang Z, Lin Y, Li H (2022) Sub-avg: overestimation reduction for cooperative multi-agent reinforcement learning. Neurocomputing 474:94\u2013106","journal-title":"Neurocomputing"},{"issue":"1","key":"9513_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"CB Browne","year":"2012","unstructured":"Browne CB, Powley E, Whitehouse D, Lucas SM, Cowling PI, Rohlfshagen P, Tavener S, Perez D, Samothrakis S, Colton S (2012) A survey of Monte Carlo tree search methods. IEEE Trans Comput Intell AI Games 4(1):1\u201343","journal-title":"IEEE Trans Comput Intell AI Games"},{"issue":"14","key":"9513_CR18","doi-asserted-by":"publisher","first-page":"8495","DOI":"10.1007\/s00521-020-05602-2","volume":"33","author":"Q Lu","year":"2021","unstructured":"Lu Q, Tao F, Zhou S, Wang Z (2021) Incorporating Actor-Critic in Monte Carlo tree search for symbolic regression. Neural Comput Appl 33(14):8495\u20138511","journal-title":"Neural Comput Appl"},{"key":"9513_CR19","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) Openai gym. arXiv preprint arXiv:1606.01540"},{"key":"9513_CR20","unstructured":"Baxter J, Tridgell A, Weaver L (1999) Knightcap: a chess program that learns by combining td (lambda) with game-tree search. arXiv preprint arXiv:cs\/9901002"},{"key":"9513_CR21","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning, pp 387\u2013395. PMLR"},{"key":"9513_CR22","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, pp 1928\u20131937. PMLR"},{"key":"9513_CR23","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1016\/j.ins.2017.08.049","volume":"460","author":"K Wal\u0229dzik","year":"2018","unstructured":"Wal\u0229dzik K, Ma\u0144dziuk J (2018) Applying hybrid Monte Carlo tree search methods to risk-aware project scheduling problem. Inf Sci 460:450\u2013468","journal-title":"Inf Sci"},{"key":"9513_CR24","doi-asserted-by":"crossref","unstructured":"Kocsis L, Szepesv\u00e1ri C (2006) Bandit based monte-carlo planning. In: European conference on machine learning, pp 282\u2013293. Springer","DOI":"10.1007\/11871842_29"},{"key":"9513_CR25","doi-asserted-by":"publisher","first-page":"106208","DOI":"10.1016\/j.asoc.2020.106208","volume":"91","author":"S Luo","year":"2020","unstructured":"Luo S (2020) Dynamic scheduling for flexible job shop with new job insertions by deep reinforcement learning. Appl Soft Comput 91:106208","journal-title":"Appl Soft Comput"},{"issue":"2","key":"9513_CR26","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1016\/S0377-2217(03)00360-6","volume":"158","author":"RD Snyder","year":"2004","unstructured":"Snyder RD, Koehler AB, Hyndman RJ, Ord JK (2004) Exponential smoothing models: means and variances for lead-time demand. Eur J Oper Res 158(2):444\u2013455","journal-title":"Eur J Oper Res"},{"issue":"1","key":"9513_CR27","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s00521-016-2401-x","volume":"29","author":"M Jiang","year":"2018","unstructured":"Jiang M, Liang Y, Feng X, Fan X, Pei Z, Xue Y, Guan R (2018) Text classification based on deep belief network and softmax regression. Neural Comput Appl 29(1):61\u201370","journal-title":"Neural Comput Appl"},{"key":"9513_CR28","unstructured":"Sabry M, Khalifa A (2019) On the reduction of variance and overestimation of deep q-learning. arXiv preprint arXiv:1910.05983"},{"key":"9513_CR29","doi-asserted-by":"crossref","unstructured":"Jadon S (2020) A survey of loss functions for semantic segmentation. In: 2020 IEEE conference on computational intelligence in bioinformatics and computational biology (CIBCB), pp 1\u20137. IEEE","DOI":"10.1109\/CIBCB48159.2020.9277638"},{"key":"9513_CR30","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09513-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09513-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09513-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,16]],"date-time":"2024-04-16T15:23:08Z","timestamp":1713280988000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09513-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,23]]},"references-count":30,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["9513"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09513-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,2,23]]},"assertion":[{"value":"5 January 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors disclosed no relevant relationships.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}