{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T19:20:50Z","timestamp":1730229650907,"version":"3.28.0"},"reference-count":12,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/icar.2017.8023674","type":"proceedings-article","created":{"date-parts":[[2017,8,31]],"date-time":"2017-08-31T16:38:37Z","timestamp":1504197517000},"page":"608-613","source":"Crossref","is-referenced-by-count":8,"title":["Model-based Q-learning for humanoid robots"],"prefix":"10.1109","author":[{"given":"Than D.","family":"Le","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"An T.","family":"Le","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duy T.","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref4","volume":"ii","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"journal-title":"Approximate Dynamic Programming Solving the Curses of Dimensionality","year":"2007","author":"warren","key":"ref3"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1002\/wics.1314"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"ref11"},{"key":"ref5","first-page":"279292","volume":"8","author":"watkins","year":"1992","journal-title":"Q-learning Machine Learning"},{"journal-title":"Q-learning Machine Learning","year":"1992","key":"ref12"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1126\/science.275.5306.1593"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref9","article-title":"Temporal Difference Learning and TD-Gammon","volume":"38","author":"gerald","year":"1995","journal-title":"Communications of the ACM"},{"journal-title":"Introduction to Reinforcement Learning","year":"1998","author":"sutton","key":"ref1"}],"event":{"name":"2017 18th International Conference on Advanced Robotics (ICAR)","start":{"date-parts":[[2017,7,10]]},"location":"Hong Kong, China","end":{"date-parts":[[2017,7,12]]}},"container-title":["2017 18th International Conference on Advanced Robotics (ICAR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8016525\/8023485\/08023674.pdf?arnumber=8023674","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,9,11]],"date-time":"2017-09-11T19:11:57Z","timestamp":1505157117000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8023674\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/icar.2017.8023674","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}