{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T19:08:42Z","timestamp":1705086522797},"reference-count":9,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"9","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2018,9,1]]},"DOI":"10.1587\/transinf.2018edl8011","type":"journal-article","created":{"date-parts":[[2018,8,31]],"date-time":"2018-08-31T22:44:41Z","timestamp":1535755481000},"page":"2409-2412","source":"Crossref","is-referenced-by-count":2,"title":["Reward-Based Exploration: Adaptive Control for Deep Reinforcement Learning"],"prefix":"10.1587","volume":"E101.D","author":[{"given":"Zhi-xiong","family":"XU","sequence":"first","affiliation":[{"name":"Institute of Command Information System, Army Engineering University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"CAO","sequence":"additional","affiliation":[{"name":"Institute of Command Information System, Army Engineering University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi-liang","family":"CHEN","sequence":"additional","affiliation":[{"name":"Institute of Command Information System, Army Engineering University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen-xi","family":"LI","sequence":"additional","affiliation":[{"name":"Institute of Command Information System, Army Engineering University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] R.S. Sutton and A.G. Barto, Reinforcement Learning: An Introduction, MIT Press, 1998.","DOI":"10.1109\/TNN.1998.712192"},{"key":"2","unstructured":"[2] F. Zhang, J. Leitner, M. Milford, et al., \u201cTowards vision-based deep reinforcement learning for robotic motion control,\u201d Computer Science, vol.56, no.2, pp.12-32, 2015."},{"key":"3","doi-asserted-by":"publisher","unstructured":"[3] V. Mnih, K. Kavukcuoglu, D. Silver, A.A. Rusu, J. Veness, M.G. Bellemare, A. Graves, M. Riedmiller, A.K. Fidjeland, G. Ostrovski, S. Petersen, C. Beattie, A. Sadik, I. Antonoglou, H. King, D. Kumaran, D. Wierstra, S. Legg, and D. Hassabis, \u201cHuman-level control through deep reinforcement learning,\u201d Nature, vol.518, no.7540, p.529, 2015. 10.1038\/nature14236","DOI":"10.1038\/nature14236"},{"key":"4","unstructured":"[4] J. Schulman, S. Levine, P. Moritz, et al., \u201cTrust region policy optimization,\u201d ICML, 2015."},{"key":"5","unstructured":"[5] M. Tokic and G. Palm, \u201cValue-difference based exploration: Adaptive control between epsilon-greedy and softmax,\u201d Advances in Artificial Intelligence, Lecture Notes in Computer Science, vol.7006, pp.335-346, Springer Berlin Heidelberg, Berlin, Heidelberg, 2011."},{"key":"6","unstructured":"[6] H. Tang, R. Houthooft, D. Foote, et al., \u201cExploration: A study of count-based exploration for deep reinforcement learning,\u201d vol.64, no.3, pp.65-98, 2016."},{"key":"7","unstructured":"[7] R. Houthooft, X. Chen, Y. Duan, et al., \u201cVIME: Variational information maximizing exploration,\u201d Neural Information Processing Systems, 2016."},{"key":"8","unstructured":"[8] T.P. Lillicrap, J.J. Hunt, A. Pritzel, et al., \u201cContinuous control with deep reinforcement learning,\u201d arXiv preprint arXiv:1509.02971, 2015."},{"key":"9","unstructured":"[9] G. Brockman, V. Cheung, and L. Pettersson, et al., \u201cOpenAI gym,\u201d arXiv preprint arXiv:1606.01540, 2016."}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E101.D\/9\/E101.D_2018EDL8011\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,23]],"date-time":"2019-10-23T10:09:00Z","timestamp":1571825340000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E101.D\/9\/E101.D_2018EDL8011\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,1]]},"references-count":9,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2018]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2018edl8011","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,9,1]]}}}