{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T15:58:39Z","timestamp":1760889519479,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1109\/iros.2017.8205960","type":"proceedings-article","created":{"date-parts":[[2017,12,14]],"date-time":"2017-12-14T22:12:59Z","timestamp":1513289579000},"page":"1545-1550","source":"Crossref","is-referenced-by-count":10,"title":["Deep dynamic policy programming for robot control with raw images"],"prefix":"10.1109","author":[{"given":"Yoshihisa","family":"Tsurumine","sequence":"first","affiliation":[]},{"given":"Yunduan","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Eiji","family":"Uchibe","sequence":"additional","affiliation":[]},{"given":"Takamitsu","family":"Matsubara","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Association for the Advancement of Artificial Intelligence (AAAI)"},{"key":"ref11","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"International Conference on Machine Learning (ICML) ICML'16"},{"key":"ref12","first-page":"119","article-title":"Dynamic policy programming with function approximation","author":"azar","year":"2011","journal-title":"International Conference on Artificial Intelligence and Statistics (AISTATS)"},{"key":"ref13","first-page":"3207","article-title":"Dynamic policy programming","volume":"13","author":"azar","year":"2012","journal-title":"The Journal of Machine Learning Research"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref15","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","author":"sutton","year":"1996","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref16","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"key":"ref17","first-page":"1369","article-title":"Linearly-solvable markov decision problems","author":"todorov","year":"2006","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2016.1274680"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2016.7803345"},{"key":"ref4","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2010.936957"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"journal-title":"Machine Learning for Aerial Image Labeling","year":"2013","author":"mnih","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref1"},{"journal-title":"Tensorflow Large-scale machine learning on heterogeneous distributed systems","year":"2016","author":"abadi","key":"ref20"},{"journal-title":"PGQ Combining policy gradient and Q-learning","year":"2016","author":"o'donoghue","key":"ref22"},{"journal-title":"Keras","year":"2015","author":"chollet","key":"ref21"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref24"},{"key":"ref23","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref26","article-title":"Q-prop: Sample-efficient policy gradient with an off-policy critic","author":"gu","year":"2017","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref25","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2017","journal-title":"International Conference on Learning Representations (ICLR)"}],"event":{"name":"2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2017,9,24]]},"location":"Vancouver, BC","end":{"date-parts":[[2017,9,28]]}},"container-title":["2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8119304\/8202121\/08205960.pdf?arnumber=8205960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,2,28]],"date-time":"2018-02-28T20:40:05Z","timestamp":1519850405000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8205960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/iros.2017.8205960","relation":{},"subject":[],"published":{"date-parts":[[2017,9]]}}}