{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T18:28:03Z","timestamp":1769279283480,"version":"3.49.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207302","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":6,"title":["Adaptive Inner-reward Shaping in Sparse Reward Games"],"prefix":"10.1109","author":[{"given":"Dong","family":"Yang","sequence":"first","affiliation":[]},{"given":"Yuhua","family":"Tang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv preprint arXiv 1412 6980"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref12","article-title":"Google research football: A novel reinforcement learning environment","author":"kurach","year":"2019","journal-title":"arXiv preprint arXiv 1907 11634"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JRPROC.1961.287775"},{"key":"ref14","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref15","article-title":"Playingatari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref17","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref18","article-title":"Shaping and policy search in reinforcement learning","author":"ng","year":"2003","journal-title":"PhD thesis"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref28","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref4","article-title":"Large-scale study of curiosity-driven learning","author":"burda","year":"2018","journal-title":"arXiv preprint arXiv 1808 04355"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1037\/h0049039"},{"key":"ref3","article-title":"End to end learning for self-driving cars","author":"bojarski","year":"2016","journal-title":"arXiv preprint arXiv 1604 07316"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2255286"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330724"},{"key":"ref5","first-page":"1922","article-title":"Landmark based reward shaping in reinforcement learning with hidden states","author":"demir","year":"2019","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref8","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 02588"},{"key":"ref7","article-title":"Learning shaping rewards in model-based reinforcement learning","volume":"115","author":"grzes","year":"2009","journal-title":"Proc AAMAS 2009 Workshop on Adaptive Learning Agents"},{"key":"ref2","article-title":"Chauffeurnet: Learning to drive by imitating the best and synthesizing the worst","author":"bansal","year":"2018","journal-title":"arXiv preprint arXiv 1812 02588"},{"key":"ref9","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","volume":"2","author":"kakade","year":"2002","journal-title":"ICML"},{"key":"ref1","first-page":"604","article-title":"Potential-based shaping in model-based reinforcement learning","author":"asmuth","year":"2008","journal-title":"AAAI"},{"key":"ref20","first-page":"463","article-title":"Learning to drive a bicycle using reinforcement learning and shaping","volume":"98","author":"randl\u00f8v","year":"1998","journal-title":"ICML"},{"key":"ref22","first-page":"222","article-title":"A possibility for implementing curiosity and boredom in model-building neural controllers","author":"schmidhuber","year":"1991","journal-title":"Proc of the International Conference on Simulation of Adaptive Behavior From Animals to Animats"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1006\/ceps.1999.1020"},{"key":"ref24","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref23","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"arXiv preprint arXiv 1506 02349"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1093\/oxfordhb\/9780195399820.013.0010","article-title":"Curiosity and motivation","author":"silvia","year":"2012","journal-title":"The Oxford Handbook of Human Motivation"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","location":"Glasgow, United Kingdom","start":{"date-parts":[[2020,7,19]]},"end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207302.pdf?arnumber=9207302","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,8]],"date-time":"2023-10-08T17:40:04Z","timestamp":1696786804000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207302\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207302","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}