{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T06:37:19Z","timestamp":1730270239309,"version":"3.28.0"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,10,24]]},"DOI":"10.1109\/iros45743.2020.9341014","type":"proceedings-article","created":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T10:49:56Z","timestamp":1615805396000},"page":"6134-6139","source":"Crossref","is-referenced-by-count":0,"title":["Exploration Strategy based on Validity of Actions in Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Hyung-Suk","family":"Yoon","sequence":"first","affiliation":[]},{"given":"Sang-Hyun","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Seung-Woo","family":"Seo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Diversity-driven exploration strategy for deep reinforcement learning","year":"2018","author":"hong","key":"ref10"},{"journal-title":"End to End Learning for Self-Driving Cars","year":"2016","author":"bojarski","key":"ref11"},{"journal-title":"Multi-agent connected autonomous driving using deep reinforcement learning","year":"2019","author":"palanisamy","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref14","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref15","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Advances in Neural Information Processing Systems 27"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.13"},{"key":"ref17","first-page":"5074","article-title":"Learning to poke by poking: Experiential learning of intuitive physics","author":"agrawal","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref18","article-title":"Proximal policy optimization algorithms","volume":"abs 1707 6347","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref4","article-title":"Unifying count-based exploration and intrinsic motivation","volume":"abs 1606 1868","author":"bellemare","year":"2016","journal-title":"CoRR"},{"key":"ref3","article-title":"Learning navigation behaviors end to end","volume":"abs ?809 10124","author":"chiang","year":"2018","journal-title":"CoRR"},{"key":"ref6","first-page":"1","article-title":"CARLA: An open urban driving simulator","author":"dosovitskiy","year":"2017","journal-title":"Proceedings of the 1st Annual Conference on Robot Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref8","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"brafman","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref2","article-title":"End-to-end training of deep visuomotor policies","author":"levine","year":"2015","journal-title":"arXiv 1504 00702 [cs LG]"},{"key":"ref1","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"key":"ref9","first-page":"2753","article-title":"#exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"}],"event":{"name":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2020,10,24]]},"location":"Las Vegas, NV, USA","end":{"date-parts":[[2021,1,24]]}},"container-title":["2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9340668\/9340635\/09341014.pdf?arnumber=9341014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T17:51:45Z","timestamp":1656438705000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9341014\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,24]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/iros45743.2020.9341014","relation":{},"subject":[],"published":{"date-parts":[[2020,10,24]]}}}