{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:07:48Z","timestamp":1764400068194,"version":"3.46.0"},"reference-count":16,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249318","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"1423-1428","source":"Crossref","is-referenced-by-count":0,"title":["HasRL Robot: A Heterogeneous Asynchronous Reinforcement Learning System for High-Dimensional Bipedal Control"],"prefix":"10.1109","author":[{"given":"Jingyang","family":"Mai","sequence":"first","affiliation":[{"name":"National University of Singapore,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zechen","family":"Guo","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengding","family":"Luo","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haozhe","family":"Ma","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593722"},{"key":"ref2","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560769"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.061"},{"key":"ref5","article-title":"Asynchronous methods for deep reinforcement learning","author":"Mnih","year":"2016","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968056"},{"key":"ref7","article-title":"Catching two birds with one stone: Reward shaping with dual random networks for balancing exploration and exploitation","volume-title":"Forty-second International Conference on Machine Learning","author":"Ma","year":"2025"},{"journal-title":"Centralized reward agent for knowledge sharing and transfer in multi-task reinforcement learning","year":"2025","author":"Ma","key":"ref8"},{"key":"ref9","article-title":"Reward shaping for reinforcement learning with an assistant reward agent","volume-title":"Forty-first International Conference on Machine Learning","author":"Ma","year":"2024"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106687"},{"key":"ref11","first-page":"1328","article-title":"Mixed-initiative bayesian sub-goal optimization in hierarchical reinforcement learning","volume-title":"Proceedings of the 23rd international conference on autonomous agents and multiagent systems","author":"Ma","year":"2024"},{"key":"ref12","first-page":"2310","article-title":"Hierarchical reinforcement learning with human-ai collaborative subgoals optimization","volume-title":"Proceedings of the 22nd international conference on autonomous agents and multiagent systems","author":"Ma","year":"2023"},{"volume-title":"Understanding LSTM networks","year":"2015","author":"Olah","key":"ref13"},{"volume-title":"Lecture5: Model-free control, Part of Lectures on Reinforcement Learning","year":"2015","author":"Silver","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"journal-title":"Openai gym","year":"2016","author":"Brockman","key":"ref16"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249318.pdf?arnumber=11249318","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:05:47Z","timestamp":1764399947000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249318\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249318","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}