{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:33:45Z","timestamp":1730266425496,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534274","type":"proceedings-article","created":{"date-parts":[[2021,9,22]],"date-time":"2021-09-22T20:32:37Z","timestamp":1632342757000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Mastering the Game of Amazons Fast by Decoupling Network Learning"],"prefix":"10.1109","author":[{"given":"Guoqi","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Xiaoyang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ruidong","family":"Chang","sequence":"additional","affiliation":[]},{"given":"Yuhang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Luyi","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Junwei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Changming","family":"Xu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/j.patcog.2019.01.006","article-title":"Wider or deeper: revisiting the ResNet model for visual recognition","volume":"90","author":"zifeng","year":"2019","journal-title":"Pattern Recognition"},{"key":"ref32","first-page":"4897","article-title":"Monte-Carlo tree search by best arm identification","volume":"30","author":"emilie","year":"0","journal-title":"NIPS 2017 &#x2014; 31st Annual Conference on Neural Information Processing Systems"},{"key":"ref31","article-title":"Monte-Carlo tree search solver","volume":"5131","author":"mark","year":"2008","journal-title":"Lecture Notes in Computer Science"},{"key":"ref30","first-page":"26","article-title":"Exact-Win strategy for overcoming AlphaZero","author":"yen-chi","year":"0","journal-title":"Proceedings of the 2018 International Conference on Computational Intelligence and Intelligent Systems"},{"key":"ref34","first-page":"1346","article-title":"Needle target-insertion trajectory planning based on reforcement learning expert's skill","author":"bi","year":"0","journal-title":"2009 IEEE International Conference on Robotics and Biomimetics (ROBIO)"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"11","DOI":"10.15358\/0340-1650-2018-12-11","article-title":"Monte Carlo tree search","volume":"47","author":"kristina","year":"2018","journal-title":"WiSt - Wirtschaftswissenschaftliches Studium"},{"key":"ref11","first-page":"3215","article-title":"Rainbow: combining improvements in deep reinforcement learning","author":"matteo","year":"2017","journal-title":"AAAI"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref13","first-page":"6118","article-title":"Value prediction network","volume":"30","author":"junhyuk","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref14"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and Semi-MDPs: a framework for temporal abstraction in reinforcement learning","volume":"112","author":"s","year":"1999","journal-title":"Artificial Intelligence"},{"key":"ref16","first-page":"4949","article-title":"Value iteration networks","author":"aviv","year":"0","journal-title":"IJCAI'17 Proceedings of the 26th International Joint Conference on Artificial Intelligence"},{"key":"ref17","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"kurtland","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref18","article-title":"From pixels to torques: policy learning with deep dynamical models","author":"niklas","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1145\/3065386","article-title":"ImageNet classification with deep convolutional neural networks","volume":"60","author":"alex","year":"2017","journal-title":"Communications of the ACM"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2014.2309077"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of Go without human knowledge","volume":"550","author":"david","year":"2017","journal-title":"Nature"},{"key":"ref27","first-page":"3338","article-title":"Deep learning for real-time atari game play using offline monte-carlo tree search planning","volume":"27","author":"guo","year":"2014","journal-title":"Advances in Neural Information Processing Systems 27"},{"key":"ref3","first-page":"57","volume":"134","author":"murray","year":"2002","journal-title":"Deep Blue &#x201D; Artificial Intelligence - Chips Challenging Champions Games Computers and Artificial Intelligence Archive"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"david","year":"2016","journal-title":"Nature"},{"key":"ref29","first-page":"50","article-title":"Research on evaluation function computer game of amazon","volume":"48","author":"guo","year":"2012","journal-title":"Jisuanji Gongcheng Yu Yingyong"},{"key":"ref5","article-title":"Mastering Chess and Shogi by Self-Play with a general reinforcement learning algorithm","author":"david","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref8","first-page":"125","article-title":"Deep reinforcement learning","author":"li","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"Monte-Carlo tree search as regularized policy optimization","author":"grill","year":"2020","journal-title":"ICML"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2005.09.048"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"volodymyr","year":"2015","journal-title":"Nature"},{"key":"ref20","first-page":"3338","article-title":"Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning","volume":"27","author":"guo","year":"2014","journal-title":"Adv Neural Inf Process Syst"},{"journal-title":"Deep reinforcement learning from self-play in imperfect-information games","year":"2016","author":"johannes","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref24","first-page":"1766","article-title":"Training deep convolutional neural networks to play Go","author":"christopher","year":"0","journal-title":"Proceedings of The 32nd International Conference on Machine Learning"},{"key":"ref23","article-title":"Move evaluation in Go using deep convolutional neural networks","author":"chris","year":"0","journal-title":"International Conference on Learning Representations ICLR 2015"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3233\/ICG-1991-14311"},{"journal-title":"AlphaGo master online series of games","year":"2017","author":"huang","key":"ref25"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534274.pdf?arnumber=9534274","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:45:52Z","timestamp":1652197552000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534274\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534274","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}