{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T07:09:36Z","timestamp":1774940976075,"version":"3.50.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2019,6,11]],"date-time":"2019-06-11T00:00:00Z","timestamp":1560211200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,6,11]],"date-time":"2019-06-11T00:00:00Z","timestamp":1560211200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1007\/s10489-019-01501-9","type":"journal-article","created":{"date-parts":[[2019,6,11]],"date-time":"2019-06-11T15:10:33Z","timestamp":1560265833000},"page":"4303-4318","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["An effective asynchronous framework for small scale reinforcement learning problems"],"prefix":"10.1007","volume":"49","author":[{"given":"Shifei","family":"Ding","sequence":"first","affiliation":[]},{"given":"Xingyu","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Xinzheng","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Tongfeng","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Weikuan","family":"Jia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,11]]},"reference":[{"key":"1501_CR1","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1016\/j.knosys.2019.03.001","volume":"174","author":"Chongsheng Zhang","year":"2019","unstructured":"Zhang C, Bi J, Xu S et al (2019) Multi-Imbalance: An open-source software for multi-class imbalance learning. Knowl-Based Syst. \nhttps:\/\/doi.org\/10.1016\/j.knosys.2019.03.001","journal-title":"Knowledge-Based Systems"},{"key":"1501_CR2","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1016\/j.ins.2019.02.065","volume":"486","author":"H Fujita","year":"2019","unstructured":"Fujita H, Cimr D (2019) Computer Aided detection for fibrillations and flutters using deep convolutional neural network. Inf Sci 486:231\u2013239","journal-title":"Inf Sci"},{"key":"1501_CR3","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.eswa.2017.04.003","volume":"82","author":"C Zhang","year":"2017","unstructured":"Zhang C, Liu C, Zhang X et al (2017) An up-to-date comparison of state-of-the-art classification algorithms. Expert Syst Appl 82:128\u2013150","journal-title":"Expert Syst Appl"},{"key":"1501_CR4","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/j.knosys.2019.03.023","volume":"175","author":"Qiu Xiao","year":"2019","unstructured":"Xiao Q, Dai J, Luo J et al (2019) Multi-view manifold regularized learning-based method for prioritizing candidate disease miRNAs. Knowl-Based Syst. \nhttps:\/\/doi.org\/10.1016\/j.knosys.2019.03.023","journal-title":"Knowledge-Based Systems"},{"key":"1501_CR5","volume-title":"Reinforcement learning: An introduction","author":"R Sutton","year":"1998","unstructured":"Sutton R, Barto A (1998) Reinforcement learning: An introduction. MIT press, Cambridge"},{"issue":"7676","key":"1501_CR6","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K et al (2017) Mastering the game of Go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"issue":"7587","key":"1501_CR7","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison M et al (2016) Mastering the game of Go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"7540","key":"1501_CR8","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"1501_CR9","first-page":"201","volume-title":"Playing atari with deep reinforcement learning","author":"V Mnih","year":"2013","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2013) Playing atari with deep reinforcement learning. Proceedings of Workshops at the 26th Neural Information Processing Systems, Lake Tahoe, pp 201\u2013220"},{"key":"1501_CR10","first-page":"173","volume-title":"Springer Proceedings in Advanced Robotics","author":"Sergey Levine","year":"2017","unstructured":"Levine S, Pastor P, Krizhevsky A et al (2016) Learning Hand-Eye Coordination for Robotic Grasping with Large-Scale Data Collection. International Symposium on Experimental Robotics. Springer, Cham, 173\u2013184"},{"key":"1501_CR11","doi-asserted-by":"crossref","unstructured":"Lenz I, Knepper R, Saxena A (2015) Deepmpc: learning deep latent features for model predictive control. In: Proceedings of the Robotics Science and Systems, Rome, pp 201\u2013209","DOI":"10.15607\/RSS.2015.XI.012"},{"key":"1501_CR12","first-page":"110","volume-title":"Simultaneous machine translation using deep reinforcement learning","author":"H Satija","year":"2016","unstructured":"Satija H, Pineau J (2016) Simultaneous machine translation using deep reinforcement learning. Proceedings of the Workshops of International Conference on Machine Learning, New York, pp 110\u2013119"},{"key":"1501_CR13","first-page":"1","volume-title":"Generating text with deep reinforcement learning","author":"H Guo","year":"2015","unstructured":"Guo H (2015) Generating text with deep reinforcement learning. Proceedings of the Workshops of Advances in Neural Information Processing Systems, Montreal, pp 1\u20139"},{"key":"1501_CR14","first-page":"1192","volume-title":"Deep reinforcement learning for dialogue generation","author":"J Li","year":"2016","unstructured":"Li J, Monroe W, Ritter A et al (2016) Deep reinforcement learning for dialogue generation. Proceedings of the Conference on Empirical Methods in Natural Language Processing, Austin, pp 1192\u20131202"},{"key":"1501_CR15","doi-asserted-by":"crossref","unstructured":"Caicedo J, Lazebnik S (2015) Active Object Localization with Deep Reinforcement Learning. IEEE International Conference on Computer Vision. IEEE, 2488\u20132496","DOI":"10.1109\/ICCV.2015.286"},{"issue":"1","key":"1501_CR16","first-page":"9","volume":"3","author":"R Sutton","year":"1988","unstructured":"Sutton R (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344","journal-title":"Mach Learn"},{"key":"1501_CR17","unstructured":"Watkins C (1989) Learning from delayed rewards. King's College, Cambridge"},{"key":"1501_CR18","volume-title":"On-line Q-learning using connectionist systems","author":"G Rummery","year":"1994","unstructured":"Rummery G, Niranjan M (1994) On-line Q-learning using connectionist systems. University of Cambridge, Department of Engineering, Cambridge"},{"key":"1501_CR19","doi-asserted-by":"crossref","unstructured":"Singh S, Sutton R (1996) Reinforcement learning with replacing eligibility traces. Recent Advances in Reinforcement Learning, 123\u2013158","DOI":"10.1007\/978-0-585-33656-5_7"},{"issue":"3","key":"1501_CR20","first-page":"185","volume":"16","author":"J Tsitsiklis","year":"1994","unstructured":"Tsitsiklis J (1994) Asynchronous stochastic approximation and Q-learning. Mach Learn 16(3):185\u2013202","journal-title":"Mach Learn"},{"key":"1501_CR21","unstructured":"Mnih V, Badia A, Mirza M et al (2016) Asynchronous methods for deep reinforcement learning. International Conference on Machine Learning, 1928\u20131937"},{"issue":"12","key":"1501_CR22","doi-asserted-by":"publisher","first-page":"4889","DOI":"10.1007\/s10489-018-1241-z","volume":"48","author":"X Zhao","year":"2018","unstructured":"Zhao X, Ding S, An Y et al (2018) Asynchronous Reinforcement Learning Algorithms for Solving Discrete Space Path Planning Problems. Appl Intell 48(12):4889\u20134904","journal-title":"Appl Intell"},{"issue":"2","key":"1501_CR23","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s10489-018-1296-x","volume":"49","author":"X Zhao","year":"2019","unstructured":"Zhao X, Ding S, An Y et al (2019) Applications of asynchronous deep reinforcement learning based on dynamic updating weights. Appl Intell 49(2):581\u2013591","journal-title":"Appl Intell"},{"key":"1501_CR24","unstructured":"Zhao X, Ding S, An Y (2018) A new asynchronous architecture for tabular reinforcement learning algorithms. Proceedings of the Eighth International Conference on Extreme Learning Machines, 172\u2013180"},{"key":"1501_CR25","unstructured":"Nair A, Srinivasan P, Blackwell S et al (2015) Massively parallel methods for deep reinforcement learning. arXiv preprint arXiv:1507.04296"},{"key":"1501_CR26","first-page":"2613","volume":"23","author":"H van Hasselt","year":"2010","unstructured":"van Hasselt H (2010) Double Q-learning. Adv Neural Inf Proces Syst 23:2613\u20132621","journal-title":"Adv Neural Inf Proces Syst"},{"issue":"9","key":"1501_CR27","doi-asserted-by":"publisher","first-page":"2184","DOI":"10.1016\/j.engappai.2013.06.016","volume":"26","author":"Y-H Wang","year":"2013","unstructured":"Wang Y-H, Li T-H, Lin C-J (2013) Backward Q-learning: The combination of Sarsa algorithm and Q-learning. Eng Appl Artif Intell 26(9):2184\u20132193","journal-title":"Eng Appl Artif Intell"},{"key":"1501_CR28","first-page":"335","volume-title":"Value-difference based exploration: adaptive control between epsilon-greedy and softmax","author":"M Tokic","year":"2011","unstructured":"Tokic M, Palm G (2011) Value-difference based exploration: adaptive control between epsilon-greedy and softmax. Annual Conference on Artificial Intelligence, Berlin, pp 335\u2013346"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01501-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-019-01501-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01501-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,9]],"date-time":"2020-06-09T23:26:21Z","timestamp":1591745181000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-019-01501-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,11]]},"references-count":28,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["1501"],"URL":"https:\/\/doi.org\/10.1007\/s10489-019-01501-9","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,6,11]]},"assertion":[{"value":"11 June 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}