{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:39:08Z","timestamp":1767339548534,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T00:00:00Z","timestamp":1670198400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T00:00:00Z","timestamp":1670198400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,5]]},"DOI":"10.1109\/robio55434.2022.10011784","type":"proceedings-article","created":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T13:51:38Z","timestamp":1674049898000},"page":"253-258","source":"Crossref","is-referenced-by-count":2,"title":["Battery Management for Warehouse Robots via Average-Reward Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Yongjin","family":"Mu","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen),Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Yanjie","family":"Li","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen),Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Ke","family":"Lin","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen),Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Ki","family":"Deng","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen),Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen),Department of Control Science and Engineering,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2020.103629"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-64096-5_9"},{"journal-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref12"},{"journal-title":"Soft actor-critic algorithms and applications","year":"2018","author":"haarnoja","key":"ref13"},{"journal-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref14"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref15"},{"key":"ref16","first-page":"1587","article-title":"Addressing function approxi-mation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref17","article-title":"(more) efficient reinforcement learning via posterior sampling","volume":"26","author":"osband","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref18","article-title":"Logarithmic online regret bounds for undis-counted reinforcement learning","volume":"19","author":"auer","year":"2006","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"21","author":"auer","year":"2008","journal-title":"Advances in neural information processing systems"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpat.2020.102124"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2005.01.036"},{"journal-title":"Average-reward model-free reinforcement learning a systematic re-view and literature mapping","year":"2020","author":"dewanto","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2017.12.008"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IEEM44572.2019.8978958"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2016.06.063"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"103729","DOI":"10.1016\/j.robot.2021.103729","article-title":"Robotic mobile ful-fillment systems: A survey on recent developments and research opportunities","volume":"137","author":"da","year":"2021","journal-title":"Robotics and Autonomous Systems"},{"key":"ref9","article-title":"Pick, pack, & survive: Charging robots in a modern warehouse based on online connected dominating sets","author":"hamann","year":"2018","journal-title":"9th International Conference on Fun with Algorithms (FUN 2018)"},{"key":"ref20","first-page":"298","article-title":"A reinforcement learning method for maximizing undis-counted rewards","volume":"298","author":"schwartz","year":"1993","journal-title":"Proceedings of the Tenth International Conference on Machine Learning"},{"key":"ref22","first-page":"3442","author":"ma","year":"2021","journal-title":"Average-reward reinforcement learning with trust region methods"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012999361974"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref24"},{"key":"ref23","first-page":"12535","article-title":"On-policy deep reinforcement learning for the average-reward criterion","author":"zhang","year":"2021","journal-title":"International Conference on Machine Learning"}],"event":{"name":"2022 IEEE International Conference on Robotics and Biomimetics (ROBIO)","start":{"date-parts":[[2022,12,5]]},"location":"Jinghong, China","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE International Conference on Robotics and Biomimetics (ROBIO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10011626\/10011636\/10011784.pdf?arnumber=10011784","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T16:52:35Z","timestamp":1676911955000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10011784\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,5]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/robio55434.2022.10011784","relation":{},"subject":[],"published":{"date-parts":[[2022,12,5]]}}}