{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:35:50Z","timestamp":1774946150640,"version":"3.50.1"},"reference-count":15,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1109\/acc.2012.6314997","type":"proceedings-article","created":{"date-parts":[[2014,7,16]],"date-time":"2014-07-16T17:55:22Z","timestamp":1405533322000},"page":"793-799","source":"Crossref","is-referenced-by-count":3,"title":["Model estimation within planning and learning"],"prefix":"10.1109","author":[{"given":"Alborz","family":"Geramifard","sequence":"first","affiliation":[]},{"given":"J. D.","family":"Redding","sequence":"additional","affiliation":[]},{"given":"J.","family":"Joseph","sequence":"additional","affiliation":[]},{"given":"N.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"How","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"15","article-title":"Reduced-order models for data-limited reinforcement learning","author":"joseph","year":"2011","journal-title":"ICML 2011 Workshop on Planning and Acting with Uncertain Models"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1162\/153244303765208377"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390240"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"12","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming (Optimization and Neural Computation Series 3)"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2009.2022423"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1109\/TRA.2002.805653"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.2514\/6.2005-6957"},{"key":"10","author":"rummery","year":"1994","journal-title":"Online Q-learning Using Connectionist Systems (Tech Rep No Cued\/f-infeng\/tr 166)"},{"key":"7","first-page":"573","article-title":"Optimal unbiased estimators for evaluating agent performance","volume":"1","author":"zinkevich","year":"2006","journal-title":"Proceedings of the 21st National Conference on Artificial Intelligence"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390166"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2011.5991309"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.2514\/6.2010-7586"},{"key":"9","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"8","author":"white","year":"2009","journal-title":"A General Framework for Reducing Variance in Agent Evaluation"}],"event":{"name":"2012 American Control Conference - ACC 2012","location":"Montreal, QC","start":{"date-parts":[[2012,6,27]]},"end":{"date-parts":[[2012,6,29]]}},"container-title":["2012 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6297579\/6314593\/06314997.pdf?arnumber=6314997","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,21]],"date-time":"2017-03-21T19:32:40Z","timestamp":1490124760000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6314997\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/acc.2012.6314997","relation":{},"subject":[],"published":{"date-parts":[[2012,6]]}}}