{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T21:42:07Z","timestamp":1770068527956,"version":"3.49.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62250710682,61906083"],"award-info":[{"award-number":["62250710682,61906083"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017607","name":"Shenzhen Fundamental Research Program","doi-asserted-by":"publisher","award":["JCYJ20190809121403553"],"award-info":[{"award-number":["JCYJ20190809121403553"]}],"id":[{"id":"10.13039\/501100017607","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191999","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-9","source":"Crossref","is-referenced-by-count":3,"title":["Constrained Reinforcement Learning for Dynamic Material Handling"],"prefix":"10.1109","author":[{"given":"Chengpeng","family":"Hu","sequence":"first","affiliation":[{"name":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China"}]},{"given":"Ziming","family":"Wang","sequence":"additional","affiliation":[{"name":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China"}]},{"given":"Jialin","family":"Liu","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China"}]},{"given":"Junyi","family":"Wen","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd,Trustworthiness Theory Research Center,Shenzhen,China"}]},{"given":"Bifei","family":"Mao","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd,Trustworthiness Theory Research Center,Shenzhen,China"}]},{"given":"Xin","family":"Yao","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1080\/00207548208947745"},{"key":"ref35","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10951-008-0090-8"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-015-1069-x"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/614"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2010.510486"},{"key":"ref36","first-page":"1","article-title":"Tianshou: A highly modularized deep reinforcement learning library","volume":"23","author":"weng","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.32473\/flairs.v35i.130584"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2021.105517"},{"key":"ref33","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.08.008"},{"key":"ref32","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/0925-5273(93)90044-L"},{"key":"ref1","first-page":"32","article-title":"A multi-agent based approach to dynamic scheduling of machines and automated guided vehicles (AGV) in manufacturing systems by considering AGV breakdowns","volume":"7","author":"kaplano?lu","year":"2015","journal-title":"International Journal of Engineering Research & Innovation"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011253011638"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/WF-IoT.2018.8355151"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2019.101849"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2014.12.029"},{"key":"ref24","author":"altman","year":"1999","journal-title":"Constrained Markov Decision Processes Stochastic Modeling"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s44163-021-00003-3"},{"key":"ref26","article-title":"Reward constrained policy optimization","author":"tessler","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref25","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref20","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2020.05.210"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-013-0852-9"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICNP49622.2020.9259378"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v29i1.3528"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1080\/00207540110091712"},{"key":"ref7","author":"brockman","year":"2016","journal-title":"OpenAI Gym"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2015.7419049"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1155\/2019\/7237459"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1080\/002075498193877"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2020.106749"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICIT.2018.8352413"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","location":"Gold Coast, Australia","start":{"date-parts":[[2023,6,18]]},"end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191999.pdf?arnumber=10191999","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:44:50Z","timestamp":1692639890000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191999\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191999","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}