{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:20:07Z","timestamp":1780053607583,"version":"3.54.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2018,5,1]],"date-time":"2018-05-01T00:00:00Z","timestamp":1525132800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2018,5,1]],"date-time":"2018-05-01T00:00:00Z","timestamp":1525132800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,5]]},"DOI":"10.1109\/icra.2018.8463213","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T18:28:03Z","timestamp":1537554483000},"page":"7548-7555","source":"Crossref","is-referenced-by-count":95,"title":["Deep Reinforcement Learning Supervised Autonomous Exploration in Office Environments"],"prefix":"10.1109","author":[{"given":"Delong","family":"Zhu","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, N.T., Hong Kong, SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tingguang","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, N.T., Hong Kong, SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Danny","family":"Ho","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, N.T., Hong Kong, SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chaoqun","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, N.T., Hong Kong, SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Max Q.-H.","family":"Meng","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, N.T., Hong Kong, SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139667"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907452"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2016.2520560"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/0278364916687027"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2006.1642157"},{"key":"ref16","author":"tai","year":"2017","journal-title":"Virtual-to-real Deep Reinforcement Learning Continuous Control of Mobile Robots for Mapless Navigation"},{"key":"ref17","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref18","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICAR.2017.8023630"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1037\/h0061626"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/34.584097"},{"key":"ref5","first-page":"146","article-title":"A frontier-based approach for autonomous exploration","author":"yamauchi","year":"1997","journal-title":"Computational Intelligence in Robotics and Automation 1997 ClRA'97 Proceedings 1997 IEEE International Symposium on IEEE"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2014.08.009"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1177\/0278364902021010834"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-012-9298-8"},{"key":"ref1","first-page":"569","article-title":"Probabilistic robotics","volume":"45","author":"thrun","year":"2005","journal-title":"Communications of the ACM"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907754"},{"key":"ref20","first-page":"2951","article-title":"Practical bayesian optimization of machine learning algorithms","author":"snoek","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-38527-2_55"}],"event":{"name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","location":"Brisbane, QLD, Australia","start":{"date-parts":[[2018,5,21]]},"end":{"date-parts":[[2018,5,25]]}},"container-title":["2018 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8449910\/8460178\/08463213.pdf?arnumber=8463213","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T20:59:43Z","timestamp":1769115583000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8463213\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icra.2018.8463213","relation":{},"subject":[],"published":{"date-parts":[[2018,5]]}}}