{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T13:07:56Z","timestamp":1730207276268,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1109\/cig.2019.8848037","type":"proceedings-article","created":{"date-parts":[[2019,9,26]],"date-time":"2019-09-26T21:49:14Z","timestamp":1569534554000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Learning Policies from Self-Play with Policy Gradients and MCTS Value Estimates"],"prefix":"10.1109","author":[{"given":"Dennis J. N. J.","family":"Soemers","sequence":"first","affiliation":[]},{"given":"Eric","family":"Piette","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Stephenson","sequence":"additional","affiliation":[]},{"given":"Cameron","family":"Browne","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"article-title":"Generating sequences with recurrent neural networks","year":"2013","author":"graves","key":"ref32"},{"article-title":"Understanding the impact of entropy on policy optimization","year":"2019","author":"ahmed","key":"ref31"},{"key":"ref30","article-title":"Automatic generation and evaluation of recombination games","author":"browne","year":"2008","journal-title":"Ph D Dissertation"},{"key":"ref36","first-page":"4765","article-title":"A unified approach to interpreting model predictions","author":"lundberg","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref35","first-page":"3207","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell AAAI"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1201\/9780429246593","author":"efron","year":"1994","journal-title":"An Introduction to the Bootstrap"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553495"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2010.2100396"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-17928-0_8"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2011.6031996"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31866-5_7"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5280-0"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2016.06.029"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860411"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2016.06.024"},{"article-title":"Mean actor critic","year":"2018","author":"allen","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref27","first-page":"457","article-title":"Off-policy actor-critic","author":"degris","year":"2012","journal-title":"Proc 29th Int Conf Mach Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2019.8790141"},{"key":"ref5","first-page":"5360","article-title":"Thinking fast and slow with deep learning and tree search","author":"anthony","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3233\/ICG-2007-30403"},{"article-title":"Modification of UCT with patterns in Monte-Carlo Go","year":"2006","author":"gelly","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75538-8_7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273531"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1007\/11871842_29","article-title":"Bandit based Monte-Carlo planning","volume":"4212","author":"kocsis","year":"0","journal-title":"Proc Mach Learn ECML 2006"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref20"},{"key":"ref22","first-page":"70","article-title":"Strategic features for general games","author":"browne","year":"2019","journal-title":"Proc 2nd Workshop on Knowledge Extraction from Games (KEG)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2018.8490420"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref23","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1613\/jair.5507"},{"key":"ref25","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2016","journal-title":"4th International Conference on Learning Representations (ICLR 2016)"}],"event":{"name":"2019 IEEE Conference on Games (CoG)","start":{"date-parts":[[2019,8,20]]},"location":"London, United Kingdom","end":{"date-parts":[[2019,8,23]]}},"container-title":["2019 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8844551\/8847948\/08848037.pdf?arnumber=8848037","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T10:47:05Z","timestamp":1658141225000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8848037\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/cig.2019.8848037","relation":{},"subject":[],"published":{"date-parts":[[2019,8]]}}}