{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:17:43Z","timestamp":1740100663282,"version":"3.37.3"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100018913","name":"Tsinghua Shenzhen International Graduate School","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100018913","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icassp43922.2022.9746211","type":"proceedings-article","created":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T19:50:34Z","timestamp":1651089034000},"page":"4023-4027","source":"Crossref","is-referenced-by-count":5,"title":["Efficient and Stable Information Directed Exploration for Continuous Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Mingzhe","family":"Chen","sequence":"first","affiliation":[{"name":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China"}]},{"given":"Xi","family":"Xiao","sequence":"additional","affiliation":[{"name":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China"}]},{"given":"Wanpeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China"}]},{"given":"Xiaotian","family":"Gao","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia,Beijing,China"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1561\/2200000070"},{"key":"ref11","article-title":"Deep exploration via randomized value functions","author":"osband","year":"2017","journal-title":"Ph D thesis"},{"year":"2017","author":"chen","article-title":"UCB exploration via q-ensembles","key":"ref12"},{"key":"ref13","first-page":"4033","article-title":"Deep exploration via bootstrapped DQN","author":"osband","year":"2016","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1287\/opre.2017.1663"},{"key":"ref15","first-page":"358","article-title":"Information directed sampling and bandits with heteroscedastic noise","author":"kirschner","year":"2018","journal-title":"Conference on Learning Theory"},{"key":"ref16","article-title":"Information directed reinforcement learning","author":"zanette","year":"2017","journal-title":"tech rep Technical report Technical Report"},{"year":"2018","author":"nikolov","article-title":"Information-directed exploration for deep reinforcement learning","key":"ref17"},{"year":"2021","author":"chen","article-title":"Randomized ensembled double q-learning: Learning fast without a model","key":"ref18"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/IROS.2012.6386109"},{"year":"2015","author":"lillicrap","article-title":"Continuous control with deep reinforcement learning","key":"ref4"},{"year":"2019","author":"berner","article-title":"Dota 2 with large scale deep reinforcement learning","key":"ref3"},{"key":"ref6","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref5","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1023\/A:1013689704352"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1016\/0196-8858(85)90002-8"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.2307\/2332286"},{"year":"2016","author":"brockman","article-title":"Openai gym","key":"ref20"},{"year":"2019","author":"wang","article-title":"Exploring model-based planning with policy networks","key":"ref22"},{"year":"2020","author":"lee","article-title":"Sunrise: A simple unified framework for ensemble learning in deep reinforcement learning","key":"ref21"},{"key":"ref23","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"}],"event":{"name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2022,5,23]]},"location":"Singapore, Singapore","end":{"date-parts":[[2022,5,27]]}},"container-title":["ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9745891\/9746004\/09746211.pdf?arnumber=9746211","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,15]],"date-time":"2022-08-15T20:07:27Z","timestamp":1660594047000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9746211\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/icassp43922.2022.9746211","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}