{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,2]],"date-time":"2026-07-02T17:24:33Z","timestamp":1783013073435,"version":"3.54.6"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610391","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"10807-10813","source":"Crossref","is-referenced-by-count":11,"title":["Barrier Functions Inspired Reward Shaping for Reinforcement Learning"],"prefix":"10.1109","author":[{"family":"Nilaksh","sequence":"first","affiliation":[{"name":"Indian Institute of Technology (IIT),Kharagpur"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Abhishek","family":"Ranjan","sequence":"additional","affiliation":[{"name":"Indian Institute of Science (IISc),Bangalore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shreenabh","family":"Agrawal","sequence":"additional","affiliation":[{"name":"Indian Institute of Science (IISc),Bangalore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aayush","family":"Jain","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology (IIT),Kharagpur"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pushpak","family":"Jagtap","sequence":"additional","affiliation":[{"name":"RBCCPS, IISc"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shishir","family":"Kolathaya","sequence":"additional","affiliation":[{"name":"CSA &#x0026; RBCCPS, IISc"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Mastering diverse domains through world models","author":"Hafner","year":"2023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","article-title":"Improving alignment of dialogue agents via targeted human judgements","author":"Glaese","year":"2022"},{"key":"ref4","first-page":"27 730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1002\/rob.4620010203"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref7","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273572"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IS.2008.4670492"},{"key":"ref10","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"Ng","year":"1999","journal-title":"Icml"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.02.008"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref14","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref16","article-title":"Go1","year":"2022"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IS.2008.4670492"},{"key":"ref18","first-page":"433","article-title":"Dynamic potential-based reward shaping","volume-title":"Proceedings of the 11th international conference on autonomous agents and multiagent systems","author":"Devlin"},{"key":"ref19","article-title":"Reward shaping in episodic reinforcement learning","author":"Grzes","year":"2017"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11741"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968254"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3341775"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref24","article-title":"Reachability constrained reinforcement learning","author":"Yu","year":"2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-62822-2_11"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273572"},{"key":"ref27","article-title":"Your value function is a control barrier function: Verification of learned policies using control theory","author":"Tan","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04921-7_39"},{"key":"ref31","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref32","article-title":"Walk these ways: Tuning robot control for generalization with multiplicity of behavior","volume-title":"Conference on Robot Learning","author":"Margolis"},{"key":"ref33","article-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","author":"Makoviychuk","year":"2021"},{"key":"ref34","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610391.pdf?arnumber=10610391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:20:17Z","timestamp":1723267217000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610391","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}