{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T05:02:14Z","timestamp":1766466134393,"version":"3.28.0"},"reference-count":15,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1109\/ijcnn.2004.1380086","type":"proceedings-article","created":{"date-parts":[[2005,4,5]],"date-time":"2005-04-05T13:41:25Z","timestamp":1112708485000},"page":"1091-1096","source":"Crossref","is-referenced-by-count":6,"title":["Model-free off-policy reinforcement learning in continuous environment"],"prefix":"10.1109","volume":"2","author":[{"given":"P.","family":"Wawrzynski","sequence":"first","affiliation":[]},{"given":"A.","family":"Pacut","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24844-6_145"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"journal-title":"A Simple Actor-critic Algorithm for Continuous Environments","year":"2003","author":"wawrzynski","key":"14"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"11"},{"key":"12","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Advances in Information Processing Systems"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015961"},{"journal-title":"Reinforcement learning using neural networks with applications to motor control","year":"2002","author":"coulom","key":"2"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"7","article-title":"Eligibility traces for off-policy policy evaluation","author":"precup","year":"2000","journal-title":"Proceedings of the 17th International Conference on Machine Learning"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993104"},{"key":"5","article-title":"Model-free least-squares policy iteration","volume":"14","author":"lagoudakis","year":"2002","journal-title":"Advances in neural information processing systems"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"9","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316511","author":"rubinstein","year":"1981","journal-title":"Simulation and the Monte Carlo Method"},{"key":"8","article-title":"Off-policy temporal-difference learning with function approximation","author":"precup","year":"2001","journal-title":"Proceedings of the Eighteenth International Conference on Machine Learning"}],"event":{"name":"2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No.04CH37541)","acronym":"IJCNN-04","location":"Budapest, Hungary"},"container-title":["2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No.04CH37541)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/9486\/30097\/01380086.pdf?arnumber=1380086","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,16]],"date-time":"2017-06-16T18:00:43Z","timestamp":1497636043000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/1380086\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2004.1380086","relation":{},"subject":[]}}