{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T21:28:37Z","timestamp":1764797317510,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T00:00:00Z","timestamp":1586131200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T00:00:00Z","timestamp":1586131200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN Appl. Sci."],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s42452-020-2560-3","type":"journal-article","created":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T10:02:46Z","timestamp":1586167366000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Reinforcement learning applied to games"],"prefix":"10.1007","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6173-7802","authenticated-orcid":false,"given":"Jo\u00e3o","family":"Crespo","sequence":"first","affiliation":[]},{"given":"Andreas","family":"Wichert","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,6]]},"reference":[{"key":"2560_CR1","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/978-3-642-31866-5_3","volume-title":"Advances in computer games","author":"P Baudi\u0161","year":"2012","unstructured":"Baudi\u0161 P, Gailly J (2012) Pachi: state of the art open source go program. In: van den Herik HJ, Plaat A (eds) Advances in computer games. Springer, Berlin, pp 24\u201338"},{"key":"2560_CR2","unstructured":"Bellemare MG, Naddaf Y, Veness J, Bowling M (2015) The arcade learning environment: an evaluation platform for general agents. In: Proceedings of the 24th international conference on artificial intelligence, IJCAI\u201915, AAAI Press, pp 4148\u20134152. http:\/\/dl.acm.org\/citation.cfm?id=2832747.2832830"},{"issue":"2","key":"2560_CR3","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio Y, Simard P, Frasconi P (1994) Learning long-term dependencies with gradient descent is difficult. IEEE Trans Neural Netw 5(2):157\u2013166. https:\/\/doi.org\/10.1109\/72.279181","journal-title":"IEEE Trans Neural Netw"},{"key":"2560_CR4","doi-asserted-by":"publisher","unstructured":"Bouzy B, Helmstetter B (2004) Monte-Carlo Go developments. Springer, Boston, pp 159\u2013174. https:\/\/doi.org\/10.1007\/978-0-387-35706-5_11","DOI":"10.1007\/978-0-387-35706-5_11"},{"key":"2560_CR5","unstructured":"Baker B, Kanitscheider I, Markov T, Wu Y, Powell G, McGrew B, Mordatch I (2019) Emergent tool use from multi-agent autocurricula. arXiv e-prints arXiv:1909.07528"},{"issue":"1","key":"2560_CR6","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/S0004-3702(01)00129-1","volume":"134","author":"M Campbell","year":"2002","unstructured":"Campbell M, Hoane A, hsiung Hsu F (2002) Deep blue. Artif Intell 134(1):57\u201383. https:\/\/doi.org\/10.1016\/S0004-3702(01)00129-1","journal-title":"Artif Intell"},{"key":"2560_CR7","doi-asserted-by":"crossref","unstructured":"Caruana R (1993) Multitask learning: a knowledge-based source of inductive bias. In: Proceedings of the tenth international conference on machine learning, Morgan Kaufmann, pp 41\u201348","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"issue":"9","key":"2560_CR8","doi-asserted-by":"publisher","first-page":"1342","DOI":"10.1038\/s41591-018-0107-6","volume":"24","author":"JD Fauw","year":"2018","unstructured":"Fauw JD, Ledsam JR, Romera-Paredes B, Nikolov S, Tomasev N, Blackwell S, Askham H, Glorot X, O\u2019Donoghue B, Visentin D, van den Driessche G, Lakshminarayanan B, Meyer C, Mackinder F, Bouton S, Ayoub K, Chopra R, King D, Karthikesalingam A, Hughes CO, Raine R, Hughes J, Sim DA, Egan C, Tufail A, Montgomery H, Hassabis D, Rees G, Back T, Khaw PT, Suleyman M, Cornebise J, Keane PA, Ronneberger O (2018) Clinically applicable deep learning for diagnosis and referral in retinal disease. Nat Med 24(9):1342\u20131350. https:\/\/doi.org\/10.1038\/s41591-018-0107-6","journal-title":"Nat Med"},{"issue":"11","key":"2560_CR9","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1287\/mnsc.35.11.1367","volume":"35","author":"PW Glynn","year":"1989","unstructured":"Glynn PW, Iglehart DL (1989) Importance sampling for stochastic simulations. Manag Sci 35(11):1367\u20131392","journal-title":"Manag Sci"},{"issue":"1","key":"2560_CR10","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/0893-6080(88)90021-4","volume":"1","author":"S Grossberg","year":"1988","unstructured":"Grossberg S (1988) Nonlinear neural networks: principles, mechanisms, and architectures. Neural Netw 1(1):17\u201361. https:\/\/doi.org\/10.1016\/0893-6080(88)90021-4","journal-title":"Neural Netw"},{"key":"2560_CR11","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"2560_CR12","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Identity mappings in deep residual networks. In: Computer vision\u2014ECCV 2016, Springer International Publishing, pp 630\u2013645. https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"2560_CR13","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Bach F, Blei D (eds) Proceedings of the 32nd international conference on machine learning. Proceedings of machine learning research, vol\u00a037, PMLR, Lille, France, pp 448\u2013456. http:\/\/proceedings.mlr.press\/v37\/ioffe15.html"},{"key":"2560_CR14","unstructured":"Kakade S, Langford J (2002) Approximately optimal approximate reinforcement learning. In: Proceedings of the nineteenth international conference on machine learning, ICML \u201902, Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, pp 267\u2013274. http:\/\/dl.acm.org\/citation.cfm?id=645531.656005"},{"issue":"3","key":"2560_CR15","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1214\/aoms\/1177729392","volume":"23","author":"J Kiefer","year":"1952","unstructured":"Kiefer J, Wolfowitz J (1952) Stochastic estimation of the maximum of a regression function. Ann Math Stat 23(3):462\u2013466. https:\/\/doi.org\/10.1214\/aoms\/1177729392","journal-title":"Ann Math Stat"},{"issue":"4","key":"2560_CR16","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/0004-3702(75)90019-3","volume":"6","author":"DE Knuth","year":"1975","unstructured":"Knuth DE, Moore RW (1975) An analysis of alpha\u2013beta pruning. Artif Intell 6(4):293\u2013326. https:\/\/doi.org\/10.1016\/0004-3702(75)90019-3","journal-title":"Artif Intell"},{"issue":"4","key":"2560_CR17","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda VR, Tsitsiklis JN (2003) OnActor-critic algorithms. SIAM J Control Optim 42(4):1143\u20131166","journal-title":"SIAM J Control Optim"},{"key":"2560_CR18","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Proceedings of the 25th international conference on neural information processing systems, vol 1, NIPS\u201912, Curran Associates Inc., USA, pp 1097\u20131105"},{"issue":"1","key":"2560_CR19","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1214\/aoms\/1177729694","volume":"22","author":"S Kullback","year":"1951","unstructured":"Kullback S, Leibler RA (1951) On information and sufficiency. Ann Math Stat 22(1):79\u201386. https:\/\/doi.org\/10.1214\/aoms\/1177729694","journal-title":"Ann Math Stat"},{"issue":"11","key":"2560_CR20","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc IEEE"},{"key":"2560_CR21","unstructured":"Lin LJ (1992) Reinforcement learning for robots using neural networks. PhD thesis, Pittsburgh, PA, USA, uMI Order No. GAX93-22750"},{"issue":"7540","key":"2560_CR22","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"2560_CR23","unstructured":"Mnih V, Puigdom\u00e8nech Badia A, Mirza M, Graves A, Lillicrap TP, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. arXiv e-prints arXiv:1602.01783"},{"key":"2560_CR24","unstructured":"Ng AY (2004) Feature selection, l1 versus l2 regularization, and rotational invariance. In: Proceedings of the twenty-first international conference on machine learning, ICML\u201904, ACM, New York, NY, USA, p\u00a078"},{"key":"2560_CR25","unstructured":"Pavlov IP (1941) Lectures on conditioned reflexes. In: Conditioned reflexes and psychiatry, vol II, International Publishers, New York"},{"key":"2560_CR26","unstructured":"Raschka S (2014) Naive Bayes and text classification I: introduction and theory. arXiv e-prints arXiv:1410.5329"},{"key":"2560_CR27","unstructured":"Evans R, Jumper J, Kirkpatrick J, Sifre L, Green TFG, Qin C, Zidek A, Nelson A, Bridgland A, Penedones H, Petersen S, Simonyan K, Crossan S, Jones DT, Silver D, Kavukcuoglu K, Hassabis D, Senior AW (Dec 2018) De novo structure prediction with deep-learning based scoring"},{"key":"2560_CR28","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1037\/h0042519","volume":"65","author":"F Rosenblatt","year":"1958","unstructured":"Rosenblatt F (1958) The perceptron: a probabilistic model for information storage and organization in the brain. Psychol Rev 65:386\u2013408","journal-title":"Psychol Rev"},{"issue":"3","key":"2560_CR29","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1007\/s10472-011-9258-6","volume":"61","author":"CD Rosin","year":"2011","unstructured":"Rosin CD (2011) Multi-armed bandits with episode context. Ann Math Artif Intell 61(3):203\u2013230. https:\/\/doi.org\/10.1007\/s10472-011-9258-6","journal-title":"Ann Math Artif Intell"},{"key":"2560_CR30","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms. arXiv e-prints arXiv:1609.04747"},{"key":"2560_CR31","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Parallel distributed processing: explorations in the microstructure of cognition, Learning internal representations by error propagation, vol 1, MIT Press, Cambridge, MA, pp 318\u2013362. http:\/\/dl.acm.org\/citation.cfm?id=104279.104293"},{"key":"2560_CR32","doi-asserted-by":"publisher","unstructured":"Scherer D, M\u00fcller A, Behnke S (2010) Evaluation of pooling operations in convolutional architectures for object recognition. In: Artificial neural networks\u2014ICANN 2010, Springer, Berlin, pp 92\u2013101. https:\/\/doi.org\/10.1007\/978-3-642-15825-4_10","DOI":"10.1007\/978-3-642-15825-4_10"},{"key":"2560_CR33","unstructured":"Schulman J, Levine S, Moritz P, Jordan M, Abbeel P (2015) Trust region policy optimization. In: Proceedings of the 32Nd international conference on international conference on machine learning, ICML\u201915, JMLR.org, vol 37, pp 1889\u20131897. http:\/\/dl.acm.org\/citation.cfm?id=3045118.3045319"},{"key":"2560_CR34","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv e-prints arXiv:1707.06347"},{"issue":"7587","key":"2560_CR35","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, van den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap T, Leach M, Kavukcuoglu K, Graepel T, Hassabis D (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484\u2013489. https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"key":"2560_CR36","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T, Lillicrap T, Simonyan K, Hassabis D (2017) Mastering Chess and Shogi by self-play with a general reinforcement learning algorithm. arXiv e-prints arXiv:1712.01815"},{"issue":"6419","key":"2560_CR37","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T, Lillicrap T, Simonyan K, Hassabis D (2018) A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419):1140\u20131144. https:\/\/doi.org\/10.1126\/science.aar6404","journal-title":"Science"},{"issue":"7676","key":"2560_CR38","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A, Chen Y, Lillicrap T, Hui F, Sifre L, van den Driessche G, Graepel T, Hassabis D (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"key":"2560_CR39","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15:1929\u20131958","journal-title":"J Mach Learn Res"},{"issue":"1","key":"2560_CR40","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/bf00115009","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344. https:\/\/doi.org\/10.1007\/bf00115009","journal-title":"Mach Learn"},{"key":"2560_CR41","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press, New York"},{"key":"2560_CR42","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the 12th international conference on neural information processing systems, NIPS\u201999, MIT Press, Cambridge, MA, USA, pp 1057\u20131063. http:\/\/dl.acm.org\/citation.cfm?id=3009657.3009806"},{"key":"2560_CR43","doi-asserted-by":"publisher","unstructured":"Tesauro G (1990) Neurogammon: a neural-network backgammon program. In: 1990 IJCNN international joint conference on neural networks, vol 3, pp 33\u201339. https:\/\/doi.org\/10.1109\/IJCNN.1990.137821","DOI":"10.1109\/IJCNN.1990.137821"},{"issue":"3","key":"2560_CR44","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and td-gammon. Commun ACM 38(3):58\u201368. https:\/\/doi.org\/10.1145\/203330.203343","journal-title":"Commun ACM"},{"key":"2560_CR45","doi-asserted-by":"publisher","unstructured":"Tokic M (2010) Adaptive $$\\epsilon$$-greedy exploration in reinforcement learning based on value differences. In: KI 2010: advances in artificial intelligence, Springer, Berlin, pp 203\u2013210. https:\/\/doi.org\/10.1007\/978-3-642-16111-7_23","DOI":"10.1007\/978-3-642-16111-7_23"},{"key":"2560_CR46","unstructured":"Vinyals O, Babuschkin I, Chung J, Mathieu M, Jaderberg M, Czarnecki WM, Dudzik A, Huang A, Georgiev P, Powell R, Ewalds T, Horgan D, Kroiss M, Danihelka I, Agapiou J, Oh J, Dalibard V, Choi D, Sifre L, Sulsky Y, Vezhnevets S, Molloy J, Cai T, Budden D, Paine T, Gulcehre C, Wang Z, Pfaff T, Pohlen T, Wu Y, Yogatama D, Cohen J, McKinney K, Smith O, Schaul T, Lillicrap T, Apps C, Kavukcuoglu K, Hassabis D, Silver D (2019) AlphaStar: mastering the real-time strategy game StarCraft II. https:\/\/deepmind.com\/blog\/alphastar-mastering-real-time-strategy-game-starcraft-ii\/"},{"key":"2560_CR47","unstructured":"Watkins CJCH (1989) Learning from delayed rewards. PhD thesis, King\u2019s College, Cambridge"},{"issue":"3","key":"2560_CR48","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3):229\u2013256. https:\/\/doi.org\/10.1007\/BF00992696","journal-title":"Mach Learn"},{"key":"2560_CR49","unstructured":"Yang LC, Chou SY, Yang YH (2017) MidiNet: a convolutional generative adversarial network for symbolic-domain music generation. arXiv e-prints arXiv: 1703.10847"}],"container-title":["SN Applied Sciences"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s42452-020-2560-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s42452-020-2560-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s42452-020-2560-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,5]],"date-time":"2021-04-05T23:18:39Z","timestamp":1617664719000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s42452-020-2560-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,6]]},"references-count":49,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["2560"],"URL":"https:\/\/doi.org\/10.1007\/s42452-020-2560-3","relation":{},"ISSN":["2523-3963","2523-3971"],"issn-type":[{"type":"print","value":"2523-3963"},{"type":"electronic","value":"2523-3971"}],"subject":[],"published":{"date-parts":[[2020,4,6]]},"assertion":[{"value":"23 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 March 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 April 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"824"}}