{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:30:11Z","timestamp":1750221011099,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,7,25]],"date-time":"2019-07-25T00:00:00Z","timestamp":1564012800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Key R&D Program of China","award":["SQ2017YFGH001005"],"award-info":[{"award-number":["SQ2017YFGH001005"]}]},{"name":"National Natural Science Foundation of China","award":["61627810","U1509211"],"award-info":[{"award-number":["61627810","U1509211"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,7,25]]},"DOI":"10.1145\/3292500.3330892","type":"proceedings-article","created":{"date-parts":[[2019,7,26]],"date-time":"2019-07-26T13:17:26Z","timestamp":1564147046000},"page":"1471-1479","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Separated Trust Regions Policy Optimization Method"],"prefix":"10.1145","author":[{"given":"Luobao","family":"Zou","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Zhiwei","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Yin","family":"Cheng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Xuechun","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Weidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2019,7,25]]},"reference":[{"volume-title":"Openai gym. arXiv preprint arXiv:1606.01540","year":"2016","author":"Brockman Greg","key":"e_1_3_2_2_1_1"},{"volume-title":"Openai baselines. https:\/\/github.com\/openai\/baselines","year":"2017","author":"Dhariwal Prafulla","key":"e_1_3_2_2_2_1"},{"volume-title":"Taming the noise in reinforcement learning via soft updates. arXiv preprint arXiv:1512.08562","year":"2015","author":"Fox Roy","key":"e_1_3_2_2_3_1"},{"volume-title":"Marc G Bellemare, and Remi Munos. The reactor: A sample-efficient actor-critic architecture. arXiv preprint arXiv:1704.04651","year":"2017","author":"Gruslys Audrunas","key":"e_1_3_2_2_4_1"},{"volume-title":"Reinforcement learning with deep energy-based policies. arXiv preprint arXiv:1702.08165","year":"2017","author":"Haarnoja Tuomas","key":"e_1_3_2_2_5_1"},{"volume-title":"Soft actorcritic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. arXiv preprint arXiv:1801.01290","year":"2018","author":"Haarnoja Tuomas","key":"e_1_3_2_2_6_1"},{"key":"e_1_3_2_2_7_1","first-page":"267","volume-title":"ICML","volume":"2","author":"Kakade Sham","year":"2002"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.2946684"},{"volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","year":"2015","author":"Lillicrap Timothy P","key":"e_1_3_2_2_9_1"},{"key":"e_1_3_2_2_10_1","first-page":"1928","volume-title":"International conference on machine learning","author":"Mnih Volodymyr","year":"2016"},{"volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","year":"2013","author":"Mnih Volodymyr","key":"e_1_3_2_2_11_1"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_2_13_1","first-page":"2775","volume-title":"Advances in Neural Information Processing Systems","author":"Nachum Ofir","year":"2017"},{"volume-title":"Trustpcl: An off-policy trust region method for continuous control. arXiv preprint arXiv:1707.01891","year":"2017","author":"Nachum Ofir","key":"e_1_3_2_2_14_1"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"volume-title":"Prioritized experience replay. arXiv preprint arXiv:1511.05952","year":"2015","author":"Schaul Tom","key":"e_1_3_2_2_16_1"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/08997660260028683"},{"key":"e_1_3_2_2_18_1","first-page":"1889","volume-title":"International Conference on Machine Learning","author":"Schulman John","year":"2015"},{"volume-title":"High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438","year":"2015","author":"Schulman John","key":"e_1_3_2_2_19_1"},{"volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","year":"2017","author":"Schulman John","key":"e_1_3_2_2_20_1"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"D Silver. D. silver a. huang cj maddison a. guez l. sifre g. van den driessche j. schrittwieser i. antonoglou v. panneershelvam m. lanctot s. dieleman d. grewe j. nham n. kalchbrenner i. sutskever t. lillicrap m. leach k. kavukcuoglu t. graepel and d. hassabis nature (london) 529 484 (2016). Nature (London) 529:484 2016.  D Silver. D. silver a. huang cj maddison a. guez l. sifre g. van den driessche j. schrittwieser i. antonoglou v. panneershelvam m. lanctot s. dieleman d. grewe j. nham n. kalchbrenner i. sutskever t. lillicrap m. leach k. kavukcuoglu t. graepel and d. hassabis nature (london) 529 484 (2016). Nature (London) 529:484 2016.","DOI":"10.1038\/nature16961"},{"volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al. Mastering the game of go with deep neural networks and tree search. nature, 529(7587):484","year":"2016","author":"Silver David","key":"e_1_3_2_2_22_1"},{"volume-title":"ICML","year":"2014","author":"Silver David","key":"e_1_3_2_2_23_1"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton Richard S","key":"e_1_3_2_2_24_1"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553508"},{"first-page":"5","volume-title":"AAAI","author":"Hasselt Hado Van","key":"e_1_3_2_2_26_1"},{"volume-title":"Marc Lanctot, and Nando De Freitas. Dueling network architectures for deep reinforcement learning. arXiv preprint arXiv:1511.06581","year":"2015","author":"Wang Ziyu","key":"e_1_3_2_2_27_1"},{"key":"e_1_3_2_2_28_1","first-page":"5279","volume-title":"Advances in neural information processing systems","author":"Mansimov Elman","year":"2017"},{"volume-title":"Modeling purposeful adaptive behavior with the principle of maximum causal entropy","year":"2010","author":"Ziebart Brian D","key":"e_1_3_2_2_29_1"},{"key":"e_1_3_2_2_30_1","first-page":"1433","volume-title":"AAAI","author":"Ziebart Brian D","year":"2008"}],"event":{"name":"KDD '19: The 25th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Anchorage AK USA","acronym":"KDD '19"},"container-title":["Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3292500.3330892","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3292500.3330892","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:26:03Z","timestamp":1750206363000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3292500.3330892"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7,25]]},"references-count":30,"alternative-id":["10.1145\/3292500.3330892","10.1145\/3292500"],"URL":"https:\/\/doi.org\/10.1145\/3292500.3330892","relation":{},"subject":[],"published":{"date-parts":[[2019,7,25]]},"assertion":[{"value":"2019-07-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}