{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T13:02:32Z","timestamp":1780318952059,"version":"3.54.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008562","name":"University of Texas at Austin","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008562","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,27]]},"DOI":"10.1109\/iros51168.2021.9636020","type":"proceedings-article","created":{"date-parts":[[2021,12,16]],"date-time":"2021-12-16T20:45:38Z","timestamp":1639687538000},"page":"2369-2375","source":"Crossref","is-referenced-by-count":36,"title":["Self-Supervised Online Reward Shaping in Sparse-Reward Environments"],"prefix":"10.1109","author":[{"given":"Farzan","family":"Memarian","sequence":"first","affiliation":[{"name":"Oden Institute for Computational Engineering and Sciences, University of Texas,Austin,TX,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wonjoon","family":"Goo","sequence":"additional","affiliation":[{"name":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rudolf","family":"Lioutikov","sequence":"additional","affiliation":[{"name":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Scott","family":"Niekum","sequence":"additional","affiliation":[{"name":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ufuk","family":"Topcu","sequence":"additional","affiliation":[{"name":"University of Texas,Department of Aerospace Engineering and Engineering Mechanics,Austin,TX,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref32","article-title":"Generative adversarial self-imitation learning","author":"guo","year":"2018","journal-title":"arXiv preprint arXiv 1812 08942"},{"key":"ref31","first-page":"3878","article-title":"Self-imitation learning","author":"oh","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1190"},{"key":"ref10","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref11","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","author":"wiewiora","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning (ICML-03)"},{"key":"ref12","article-title":"Reinforcement learning from demonstration through shaping","author":"brys","year":"2015","journal-title":"Twenty-Fourth International Joint Conference on Artificial Intelligence"},{"key":"ref13","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","volume":"97","author":"brown","year":"0"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref15","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref16","first-page":"433","article-title":"Dynamic potential-based reward shaping","author":"devlin","year":"2012","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref17","article-title":"Keeping your distance: Solving sparse reward tasks using self-balancing shaped rewards","author":"trott","year":"2019","journal-title":"arXiv preprint arXiv 1911 12945"},{"key":"ref18","article-title":"Reward shaping via meta-learning","author":"zou","year":"2019","journal-title":"arXiv preprint arXiv 1901 04668"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref28","article-title":"Safe imitation learning via fast bayesian reward inference from preferences","author":"brown","year":"2020","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33017749"},{"key":"ref3","article-title":"Deep reinforcement learning with double q-learning","author":"van hasselt","year":"2015","journal-title":"arXiv preprint arXiv 1509 06461"},{"key":"ref6","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref29","first-page":"950","article-title":"Fast differentiable sorting and ranking","author":"blondel","year":"2020","journal-title":"Proceedings of the 37th International Conference on Machine Learning"},{"key":"ref8","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref2","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref9","first-page":"6765","article-title":"Inverse reward design","author":"hadfield-menell","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","article-title":"Learning by playing-solving sparse reward tasks from scratch","author":"riedmiller","year":"2018","journal-title":"arXiv preprint arXiv 1802 10363"},{"key":"ref22","first-page":"4299","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9304190"},{"key":"ref24","article-title":"Better-than-demonstrator imitation learning via automatically-ranked demonstrations","author":"brown","year":"2019","journal-title":"Proceedings of The 3rd Conference on Robot Learning"},{"key":"ref23","first-page":"8022","article-title":"Reward learning from human preferences and demonstrations in atari","author":"ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref26","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc Seventh Int Conf Machine Learning"},{"key":"ref25","article-title":"The optimal reward problem: Designing effective reward for bounded agents","author":"sorg","year":"2011","journal-title":"Ph D Dissertation"}],"event":{"name":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Prague, Czech Republic","start":{"date-parts":[[2021,9,27]]},"end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9635848\/9635849\/09636020.pdf?arnumber=9636020","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T23:53:55Z","timestamp":1670284435000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9636020\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,27]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/iros51168.2021.9636020","relation":{},"subject":[],"published":{"date-parts":[[2021,9,27]]}}}