{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T12:12:35Z","timestamp":1764936755291,"version":"3.28.0"},"reference-count":11,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1109\/adprl.2014.7010633","type":"proceedings-article","created":{"date-parts":[[2015,1,19]],"date-time":"2015-01-19T21:48:03Z","timestamp":1421704083000},"page":"1-9","source":"Crossref","is-referenced-by-count":2,"title":["Pseudo-MDPs and factored linear action models"],"prefix":"10.1109","author":[{"given":"Hengshuai","family":"Yao","sequence":"first","affiliation":[]},{"given":"Csaba","family":"Szepesvari","sequence":"additional","affiliation":[]},{"given":"Bernardo Avila","family":"Pires","sequence":"additional","affiliation":[]},{"given":"Xinhua","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref4","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"JMLR"},{"key":"ref3","first-page":"535","article-title":"Modelling transition dynamics in MDPs with RKHS embeddings","author":"gr\u00fcnew\u00e4lder","year":"2012","journal-title":"ICML"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1561\/2200000016"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1060.0188"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390191"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref7","article-title":"Approximate policy iteration with linear action models","author":"yao","year":"2012","journal-title":"AAAI"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"article-title":"The simplex method is strongly polynomial for the markov decision problem with a fixed discount rate","year":"2010","author":"ye","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"}],"event":{"name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2014,12,9]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2014,12,12]]}},"container-title":["2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7000183\/7010603\/07010633.pdf?arnumber=7010633","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,23]],"date-time":"2017-03-23T20:48:04Z","timestamp":1490302084000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7010633\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":11,"URL":"https:\/\/doi.org\/10.1109\/adprl.2014.7010633","relation":{},"subject":[],"published":{"date-parts":[[2014,12]]}}}