{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T11:57:57Z","timestamp":1742644677739,"version":"3.28.0"},"reference-count":17,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,8]]},"DOI":"10.23919\/acc53348.2022.9867896","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T20:24:10Z","timestamp":1662409450000},"page":"2892-2898","source":"Crossref","is-referenced-by-count":3,"title":["Sample efficient transfer in reinforcement learning for high variable cost environments with an inaccurate source reward model"],"prefix":"10.23919","author":[{"given":"Md Ferdous","family":"Alam","sequence":"first","affiliation":[{"name":"The Ohio State University,Department of Mechanical and Aerospace Engineering,Columbus,OH,USA,43210"}]},{"given":"Max","family":"Shtein","sequence":"additional","affiliation":[{"name":"University of Michigan,Department of Materials Science and Engineering,Ann Arbor,Michigan,USA,48109"}]},{"given":"Kira","family":"Barton","sequence":"additional","affiliation":[{"name":"University of Michigan,Department of Mechanical Engineering,Ann Arbor,Michigan,USA,48109"}]},{"given":"David J.","family":"Hoelzle","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Mechanical and Aerospace Engineering,Columbus,OH,USA,43210"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/S0927-0507(05)80172-0"},{"article-title":"Temporal abstraction in reinforcement learning","year":"2001","author":"precup","key":"ref12"},{"key":"ref13","article-title":"Value function based reinforcement learning in changing markovian environments","volume":"9","author":"cs\u00e1ji","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref14","article-title":"Successor features for transfer in reinforcement learning","author":"barreto","year":"2016","journal-title":"arXiv preprint arXiv 1606 05312"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160762"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1115\/MSEC2020-8472"},{"key":"ref17","first-page":"63","article-title":"Gaussian processes in machine learning","author":"rasmussen","year":"2003","journal-title":"Machine Learning Summer School"},{"key":"ref4","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.2351\/1.4977476"},{"key":"ref5","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref8","article-title":"Transfer learning for reinforcement learning domains: A survey","volume":"10","author":"taylor","year":"2009","journal-title":"Journal of Machine Learning Research"},{"key":"ref7","article-title":"A physics guided reinforcement learning framework for an autonomous manufacturing system with expensive data","author":"max shtein","year":"2021","journal-title":"2021 American Control Conference (ACC)"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref1","volume":"135","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_5"}],"event":{"name":"2022 American Control Conference (ACC)","start":{"date-parts":[[2022,6,8]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2022,6,10]]}},"container-title":["2022 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9866948\/9867142\/09867896.pdf?arnumber=9867896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T20:24:18Z","timestamp":1665433458000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9867896\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,8]]},"references-count":17,"URL":"https:\/\/doi.org\/10.23919\/acc53348.2022.9867896","relation":{},"subject":[],"published":{"date-parts":[[2022,6,8]]}}}