{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:21:56Z","timestamp":1750220516315,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,12,11]],"date-time":"2020-12-11T00:00:00Z","timestamp":1607644800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,12,11]]},"DOI":"10.1145\/3445815.3445836","type":"proceedings-article","created":{"date-parts":[[2021,3,17]],"date-time":"2021-03-17T17:05:28Z","timestamp":1616000728000},"page":"122-127","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Reward Shaping Method based on Meta-LSTM for Continuous Control of Robot"],"prefix":"10.1145","author":[{"given":"Jixun","family":"Yao","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology Northwestern Polytechnical University, Xi'an Shaanxi 710072 P. R., China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoan","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology Northwestern Polytechnical University, Xi'an Shaanxi 710072 P. R., China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dengshan","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology Northwestern Polytechnical University, Xi'an Shaanxi 710072 P. R., China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,3,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"P. H\u00e4m\u00e4l\u00e4inen A. Babadi X. Ma and J. J. Lehtinen. 2018. PPO-CMA: Proximal policy optimization with covariance matrix adaptation in arXiv preprint arXiv:1810.02541.  P. H\u00e4m\u00e4l\u00e4inen A. Babadi X. Ma and J. J. Lehtinen. 2018. PPO-CMA: Proximal policy optimization with covariance matrix adaptation in arXiv preprint arXiv:1810.02541."},{"key":"e_1_3_2_1_2_1","unstructured":"J. Schulman F. Wolski P. Dhariwal A. Radford and O. Klimov. 2017. Proximal policy optimization algorithms in arXiv preprint arXiv:1707.06347.  J. Schulman F. Wolski P. Dhariwal A. Radford and O. Klimov. 2017. Proximal policy optimization algorithms in arXiv preprint arXiv:1707.06347."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1177\/1729881419898342"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Z. Hu K. Wan X. Gao and Y. Zhai 2019 A Dynamic Adjusting Reward Function Method for Deep Reinforcement Learning with Adjustable Parameters Mathematical Problems in Engineering 11 10(NOV.2019).  Z. Hu K. Wan X. Gao and Y. Zhai 2019 A Dynamic Adjusting Reward Function Method for Deep Reinforcement Learning with Adjustable Parameters Mathematical Problems in Engineering 11 10(NOV.2019).","DOI":"10.1155\/2019\/7619483"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.08.155"},{"volume-title":"Proceedings of the 32th AAAI Conference on Artificial Intelligence. 3762-3769","author":"Marom O.","key":"e_1_3_2_1_6_1","unstructured":"O. Marom and B. Rosman , 2018. Belief reward shaping in reinforcement learning . in Proceedings of the 32th AAAI Conference on Artificial Intelligence. 3762-3769 . O. Marom and B. Rosman, 2018. Belief reward shaping in reinforcement learning. in Proceedings of the 32th AAAI Conference on Artificial Intelligence. 3762-3769."},{"key":"e_1_3_2_1_7_1","unstructured":"J. X. Wang 2016 Learning to reinforcement learn in arXiv preprint arXiv:1611.05763.  J. X. Wang 2016 Learning to reinforcement learn in arXiv preprint arXiv:1611.05763."},{"key":"e_1_3_2_1_8_1","unstructured":"Y. Duan J. Schulman X. Chen P. L. Bartlett I. Sutskever and P. Abbeel 2016. Rl2: Fast reinforcement learning via slow reinforcement learning. in arXiv preprint arXiv:1611.02779.  Y. Duan J. Schulman X. Chen P. L. Bartlett I. Sutskever and P. Abbeel 2016. Rl2: Fast reinforcement learning via slow reinforcement learning. in arXiv preprint arXiv:1611.02779."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 29th Advances in neural information processing systems, 3981-3989","author":"Andry M.","year":"2016","unstructured":"M. Andry , 2016 . Learning to learn by gradient descent by gradient descent , in Proceedings of the 29th Advances in neural information processing systems, 3981-3989 . M. Andry , 2016. Learning to learn by gradient descent by gradient descent, in Proceedings of the 29th Advances in neural information processing systems, 3981-3989."},{"volume-title":"Proceedings of the 13th Advances in neural information processing systems.1057-1063","author":"Sutton R. S.","key":"e_1_3_2_1_10_1","unstructured":"R. S. Sutton , D. A. McAllester , and Y. Mansour , 2000. Policy gradient methods for reinforcement learning with function approximation , in Proceedings of the 13th Advances in neural information processing systems.1057-1063 . R. S. Sutton, D. A. McAllester, and Y. Mansour, 2000. Policy gradient methods for reinforcement learning with function approximation, in Proceedings of the 13th Advances in neural information processing systems.1057-1063."},{"volume-title":"Proceedings of the 13th Advances in neural information processing system, 1008-1014","author":"Konda V. R.","key":"e_1_3_2_1_11_1","unstructured":"V. R. Konda and J. N. Tsitsiklis , 2000. Actor-critic algorithms , in Proceedings of the 13th Advances in neural information processing system, 1008-1014 . V. R. Konda and J. N. Tsitsiklis, 2000. Actor-critic algorithms, in Proceedings of the 13th Advances in neural information processing system, 1008-1014."},{"key":"e_1_3_2_1_12_1","volume-title":"Meta-sgd: Learning to learn quickly for few-shot learning, in arXiv preprint arXiv:1707.09835.","author":"Li Z.","year":"2017","unstructured":"Z. Li , F. Zhou , F. Chen , and H. Li , 2017 . Meta-sgd: Learning to learn quickly for few-shot learning, in arXiv preprint arXiv:1707.09835. Z. Li, F. Zhou, F. Chen, and H. Li, 2017. Meta-sgd: Learning to learn quickly for few-shot learning, in arXiv preprint arXiv:1707.09835."},{"volume-title":"Proceedings of the 32th AAAI Conference on Artificial Intelligence. 5070-5077","author":"Chen J.","key":"e_1_3_2_1_13_1","unstructured":"J. Chen , X. Qiu , P. Liu , and X. Huang . 2018. Meta multi-task learning for sequence modeling , in Proceedings of the 32th AAAI Conference on Artificial Intelligence. 5070-5077 . J. Chen, X. Qiu, P. Liu, and X. Huang. 2018. Meta multi-task learning for sequence modeling, in Proceedings of the 32th AAAI Conference on Artificial Intelligence. 5070-5077."},{"key":"e_1_3_2_1_14_1","first-page":"5026","article-title":"in the Proceedings of the 25th IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"Todorov E.","year":"2012","unstructured":"E. Todorov , T. Erez , and Y. Tassa , Mujoco: A physics engine for model-based control , 2012 . in the Proceedings of the 25th IEEE\/RSJ International Conference on Intelligent Robots and Systems , IEEE , 5026 - 5033 . E. Todorov, T. Erez, and Y. Tassa, Mujoco: A physics engine for model-based control, 2012. in the Proceedings of the 25th IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, 5026-5033.","journal-title":"IEEE"},{"volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning. 1329-1338","author":"Duan Y.","key":"e_1_3_2_1_15_1","unstructured":"Y. Duan , X. Chen , R. Houthooft , J. Schulman , and P. Abbeel , 2016 Benchmarking deep reinforcement learning for continuous control , in Proceedings of the 33rd International Conference on International Conference on Machine Learning. 1329-1338 . Y. Duan, X. Chen, R. Houthooft, J. Schulman, and P. Abbeel, 2016 Benchmarking deep reinforcement learning for continuous control, in Proceedings of the 33rd International Conference on International Conference on Machine Learning. 1329-1338."}],"event":{"name":"CSAI 2020: 2020 4th International Conference on Computer Science and Artificial Intelligence","acronym":"CSAI 2020","location":"Zhuhai China"},"container-title":["2020 4th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3445815.3445836","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3445815.3445836","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:24:33Z","timestamp":1750195473000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3445815.3445836"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,11]]},"references-count":15,"alternative-id":["10.1145\/3445815.3445836","10.1145\/3445815"],"URL":"https:\/\/doi.org\/10.1145\/3445815.3445836","relation":{},"subject":[],"published":{"date-parts":[[2020,12,11]]},"assertion":[{"value":"2021-03-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}