{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:32:10Z","timestamp":1750221130490,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,3,8]],"date-time":"2019-03-08T00:00:00Z","timestamp":1552003200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["61473271, 61836011"],"award-info":[{"award-number":["61473271, 61836011"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,3,8]]},"DOI":"10.1145\/3318265.3318294","type":"proceedings-article","created":{"date-parts":[[2019,4,9]],"date-time":"2019-04-09T12:34:06Z","timestamp":1554813246000},"page":"38-44","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Two-stage population based training method for deep reinforcement learning"],"prefix":"10.1145","author":[{"given":"Yinda","family":"Zhou","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiming","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Li","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,3,8]]},"reference":[{"volume-title":"Reinforcement learning: An introduction{M}","year":"2018","author":"Sutton R S","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"volume-title":"A Deep Hierarchical Approach to Lifelong Learning in Minecraft{C}\/\/AAAI","year":"2017","author":"Tessler C","key":"e_1_3_2_1_3_1"},{"volume-title":"Starcraft ii: A new challenge for reinforcement learning{J}. arXiv preprint arXiv:1708.04782","year":"2017","author":"Vinyals O","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"volume-title":"Sim2real view invariant visual servoing by recurrent control{J}. arXiv preprint arXiv:1712.07642","year":"2017","author":"Sadeghi F","key":"e_1_3_2_1_7_1"},{"key":"e_1_3_2_1_8_1","first-page":"1","article-title":"Sim-to-real transfer of robotic control with dynamics randomization{C}\/\/2018 IEEE International Conference on Robotics and Automation (ICRA)","volume":"2018","author":"Peng X B","journal-title":"IEEE"},{"volume-title":"Deep reinforcement learning-based image captioning with embedding reward{J}. arXiv preprint arXiv:1704.03899","year":"2017","author":"Ren Z","key":"e_1_3_2_1_9_1"},{"volume-title":"CVPR","year":"2017","author":"Yoo S","key":"e_1_3_2_1_10_1"},{"volume-title":"Multi-shot Pedestrian Re-identification via Sequential Decision Making{J}. arXiv preprint arXiv:1712.07257","year":"2017","author":"Zhang J","key":"e_1_3_2_1_11_1"},{"volume-title":"Subgoal Discovery for Hierarchical Dialogue Policy Learning{J}. arXiv preprint arXiv:1804.07855","year":"2018","author":"Tang D","key":"e_1_3_2_1_12_1"},{"volume-title":"An actor-critic algorithm for sequence prediction{J}. arXiv preprint arXiv:1607.07086","year":"2016","author":"Bahdanau D","key":"e_1_3_2_1_13_1"},{"volume-title":"Deep reinforcement learning that matters{J}. arXiv preprint arXiv:1709.06560","year":"2017","author":"Henderson P","key":"e_1_3_2_1_14_1"},{"key":"e_1_3_2_1_15_1","first-page":"281","article-title":"Random search for hyper-parameter optimization{J}","volume":"13","author":"Bergstra J","year":"2012","journal-title":"Journal of Machine Learning Research"},{"volume-title":"Making a science of model search: Hyperparameter optimization in hundreds of dimensions for vision architectures{J}","year":"2013","author":"Bergstra J","key":"e_1_3_2_1_16_1"},{"key":"e_1_3_2_1_17_1","unstructured":"Bergstra J S Bardenet R Bengio Y etal Algorithms for hyper-parameter optimization{C}\/\/Advances in neural information processing systems. 2011: 2546--2554.   Bergstra J S Bardenet R Bengio Y et al. Algorithms for hyper-parameter optimization{C}\/\/Advances in neural information processing systems. 2011: 2546--2554."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-25566-3_40"},{"key":"e_1_3_2_1_19_1","unstructured":"Snoek J Larochelle H Adams R P. Practical bayesian optimization of machine learning algorithms{C}\/\/Advances in neural information processing systems. 2012: 2951--2959.   Snoek J Larochelle H Adams R P. Practical bayesian optimization of machine learning algorithms{C}\/\/Advances in neural information processing systems. 2012: 2951--2959."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205455.3205486"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-10677-4_60"},{"volume-title":"Population based training of neural networks{J}. arXiv preprint arXiv:1711.09846","year":"2017","author":"Jaderberg M","key":"e_1_3_2_1_22_1"},{"volume-title":"Deep Reinforcement Learning with Double Q-Learning{C}\/\/AAAI","year":"2016","author":"Van Hasselt H","key":"e_1_3_2_1_23_1"},{"volume-title":"Prioritized experience replay{J}. arXiv preprint arXiv:1511.05952","year":"2015","author":"Schaul T","key":"e_1_3_2_1_24_1"},{"volume-title":"Dueling network architectures for deep reinforcement learning{J}. arXiv preprint arXiv:1511.06581","year":"2015","author":"Wang Z","key":"e_1_3_2_1_25_1"},{"volume-title":"Shimkin N. Averaged-dqn: Variance reduction and stabilization for deep reinforcement learning{J}. arXiv preprint arXiv:1611.01929","year":"2016","author":"Anschel O","key":"e_1_3_2_1_26_1"},{"volume-title":"Noisy networks for exploration{J}. arXiv preprint arXiv:1706.10295","year":"2017","author":"Fortunato M","key":"e_1_3_2_1_27_1"},{"volume-title":"Rainbow: Combining improvements in deep reinforcement learning{J}. arXiv preprint arXiv:1710.02298","year":"2017","author":"Hessel M","key":"e_1_3_2_1_28_1"},{"volume-title":"Continuous control with deep reinforcement learning{J}. arXiv preprint arXiv:1509.02971","year":"2015","author":"Lillicrap T P","key":"e_1_3_2_1_29_1"},{"key":"e_1_3_2_1_30_1","unstructured":"Schulman J Levine S Abbeel P etal Trust region policy optimization{C}\/\/International Conference on Machine Learning. 2015: 1889--1897.   Schulman J Levine S Abbeel P et al. Trust region policy optimization{C}\/\/International Conference on Machine Learning. 2015: 1889--1897."},{"volume-title":"Proximal policy optimization algorithms{J}. arXiv preprint arXiv:1707.06347","year":"2017","author":"Schulman J","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","unstructured":"Mnih V Badia A P Mirza M etal Asynchronous methods for deep reinforcement learning{C}\/\/International conference on machine learning. 2016: 1928--1937.   Mnih V Badia A P Mirza M et al. Asynchronous methods for deep reinforcement learning{C}\/\/International conference on machine learning. 2016: 1928--1937."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390240"},{"key":"e_1_3_2_1_34_1","unstructured":"Downey C Sanner S. Temporal Difference Bayesian Model Averaging: A Bayesian Perspective on Adapting Lambda{C}\/\/ICML. 2010: 311--318.   Downey C Sanner S. Temporal Difference Bayesian Model Averaging: A Bayesian Perspective on Adapting Lambda{C}\/\/ICML. 2010: 311--318."},{"volume-title":"How to discount deep reinforcement learning: Towards new dynamic strategies{J}. arXiv preprint arXiv:1512.02011","year":"2015","author":"Fran\u00e7ois-Lavet V","key":"e_1_3_2_1_35_1"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(02)00056-4"},{"volume-title":"Adaptive Lambda Least-Squares Temporal Difference Learning{J}. arXiv preprint arXiv:1612.09465","year":"2016","author":"Mann T A","key":"e_1_3_2_1_37_1"},{"volume-title":"Divide the gradient by a running average of its recent magnitude{J}. COURSERA: Neural networks for machine learning","year":"2012","author":"Tieleman T","key":"e_1_3_2_1_38_1"}],"event":{"name":"HP3C '19: 2019 the 3rd International Conference on High Performance Compilation, Computing and Communications","acronym":"HP3C '19","location":"Xi'an China"},"container-title":["Proceedings of the 3rd International Conference on High Performance Compilation, Computing and Communications"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318265.3318294","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3318265.3318294","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:02:24Z","timestamp":1750208544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318265.3318294"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,8]]},"references-count":38,"alternative-id":["10.1145\/3318265.3318294","10.1145\/3318265"],"URL":"https:\/\/doi.org\/10.1145\/3318265.3318294","relation":{},"subject":[],"published":{"date-parts":[[2019,3,8]]},"assertion":[{"value":"2019-03-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}