{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:36:26Z","timestamp":1774539386547,"version":"3.50.1"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1109\/wcsp.2019.8928124","type":"proceedings-article","created":{"date-parts":[[2019,12,27]],"date-time":"2019-12-27T12:06:26Z","timestamp":1577448386000},"page":"1-6","source":"Crossref","is-referenced-by-count":5,"title":["Selector-Actor-Critic and Tuner-Actor-Critic Algorithms for Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ala'eddin","family":"Masadeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengdao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmed E.","family":"Kamal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-6675-8_674"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2014.6889733"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2011.2170565"},{"key":"ref13","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref15","first-page":"320","article-title":"Towards a data efficient off-policy policy gradient","author":"hanna","year":"2018","journal-title":"Proc of the AAAI Spring Symposium on Data Efficient Reinforcement Learning"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref17","author":"degris","year":"2012","journal-title":"Off-policy Actor-critic"},{"key":"ref18","first-page":"719","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"2010","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref19","author":"hanna","year":"2017","journal-title":"Data-efficient policy evaluation through behavior policy search"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/AFGR.2002.1004150"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2654539"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref7","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref2","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Proc of the International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899161"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2016.7564863"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"}],"event":{"name":"2019 11th International Conference on Wireless Communications and Signal Processing (WCSP)","location":"Xi'an, China","start":{"date-parts":[[2019,10,23]]},"end":{"date-parts":[[2019,10,25]]}},"container-title":["2019 11th International Conference on Wireless Communications and Signal Processing (WCSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8914721\/8927844\/08928124.pdf?arnumber=8928124","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T16:23:11Z","timestamp":1658247791000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8928124\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/wcsp.2019.8928124","relation":{},"subject":[],"published":{"date-parts":[[2019,10]]}}}