{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:35:24Z","timestamp":1730255724269,"version":"3.28.0"},"reference-count":52,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9811963","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"page":"3616-3623","source":"Crossref","is-referenced-by-count":3,"title":["Offline Learning of Counterfactual Predictions for Real-World Robotic Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jun","family":"Jin","sequence":"first","affiliation":[{"name":"Huawei Technologies Canada, Ltd.,Noah&#x0027;s Ark Lab,Edmonton,AB.,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Graves","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Ltd.,Noah&#x0027;s Ark Lab,Edmonton,AB.,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cameron","family":"Haigh","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Ltd.,Noah&#x0027;s Ark Lab,Edmonton,AB.,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Luo","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Ltd.,Noah&#x0027;s Ark Lab,Edmonton,AB.,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Jagersand","sequence":"additional","affiliation":[{"name":"University of Alberta,Department of Computing Science,Edmonton,AB.,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1109\/ICRA48506.2021.9560734"},{"key":"ref38","article-title":"Dream to con-trol: Learning behaviors by latent imagination","author":"hafner","year":"2019","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1108\/AA-03-2018-039"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/IROS.2017.8202244"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/IROS.2018.8594353"},{"key":"ref30","article-title":"Transferable force-torque dynamics model for peg-in-hole task","author":"ding","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref37","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"0","journal-title":"International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref36","DOI":"10.1109\/ICRA.2019.8793649"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1109\/IROS.2014.6943202"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/AIM.2016.7576815"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/ICRA.2019.8794074"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/ICRA.2019.8793485"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/COASE.2019.8842940"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1145\/345124.345153"},{"key":"ref1","article-title":"The ingredients of real-world robotic reinforcement learning","author":"zhu","year":"2020","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref22","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/ICRA.2019.8793789"},{"key":"ref24","first-page":"8538","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","author":"fu","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref23","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"Compare contact model-based control and contact model-free learning: A survey of robotic peg-in-hole assembly strategies","author":"xu","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref25","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"arXivprint"},{"key":"ref50","article-title":"Continuous control with deep reinforce-ment learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1109\/ICRA.2018.8461044"},{"doi-asserted-by":"publisher","key":"ref52","DOI":"10.1109\/MRA.2007.339609"},{"key":"ref10","article-title":"Perception as prediction using general value functions in autonomous driving applications","author":"graves","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"761","article-title":"Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","volume":"2","author":"sutton","year":"0","journal-title":"The 10th International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref40","article-title":"Contrastive variational reinforcement learning for complex observations","author":"ma","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref12","article-title":"Challenges of real-world reinforcement learning","author":"dulac-arnold","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref13","first-page":"4635","article-title":"Setting up a reinforcement learning task with a real-world robot","author":"mahmood","year":"0","journal-title":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"},{"key":"ref14","article-title":"Benchmarking reinforcement learning algorithms on real-world robots","author":"mahmood","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref15","first-page":"1","article-title":"Guided policy search","author":"levine","year":"0","journal-title":"International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref17","article-title":"A dissection of overfitting and generalization in continuous reinforcement learning","author":"zhang","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref18","first-page":"1","article-title":"Abstraction and general-ization in reinforcement learning: A summary and framework","author":"ponsen","year":"0","journal-title":"Adaptive and Learning Agents Workshop"},{"key":"ref19","article-title":"Episodic curiosity through reachability","author":"savinov","year":"2018","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1017\/S0140525X12000477"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1093\/acprof:oso\/9780199832798.003.0002"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1364\/JOSAA.20.001434"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1038\/4580"},{"key":"ref8","article-title":"Better generalization with forecasts","author":"schaul","year":"0","journal-title":"Twenty-Third International Joint Conference on Artificial Intelligence"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1038\/4594"},{"key":"ref49","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"arXivpreprintarXiv"},{"key":"ref9","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"levine","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref46","first-page":"1377","article-title":"Temporal-difference networks","author":"sutton","year":"2005","journal-title":"Advances in neural information processing systems"},{"key":"ref45","first-page":"9310","article-title":"Discovery of useful questions as auxiliary tasks","author":"veeriah","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref48","article-title":"Importance resampling for off-policy prediction","author":"schlegel","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref47","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref42","article-title":"A comparison of general value functions and temporal-difference networks","author":"patterson","year":"0","journal-title":"International Conference on Autonomous Agents and Multiagent Systems (AAMAS)"},{"key":"ref41","article-title":"Learning predictive representations for deformable objects using contrastive estimation","author":"yan","year":"2020","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1109\/ICRA48506.2021.9561450"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1109\/ICRA40945.2020.9197148"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2022,5,23]]},"location":"Philadelphia, PA, USA","end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09811963.pdf?arnumber=9811963","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:08:51Z","timestamp":1667516931000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9811963\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9811963","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}