{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T00:59:18Z","timestamp":1768352358425,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,6,27]],"date-time":"2018-06-27T00:00:00Z","timestamp":1530057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,6,27]]},"DOI":"10.1145\/3234804.3234816","type":"proceedings-article","created":{"date-parts":[[2018,9,7]],"date-time":"2018-09-07T12:51:23Z","timestamp":1536324683000},"page":"27-36","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Survey of Deep Q-Network variants in PyGame Learning Environment"],"prefix":"10.1145","author":[{"given":"Evalds","family":"Urtans","sequence":"first","affiliation":[{"name":"Riga Technical University, Riga, Latvia"}]},{"given":"Agris","family":"Nikitenko","sequence":"additional","affiliation":[{"name":"Riga Technical University, Riga, Latvia"}]}],"member":"320","published-online":{"date-parts":[[2018,6,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Deep Reinforcement Learning with Averaged Target DQN. NIPS Workshop.","author":"Anschel O.","year":"2016"},{"key":"e_1_3_2_1_2_1","unstructured":"Babaeizadeh M. Frosio I. Tyree S. Clemons J. and Kautz J. 2017. GA3C: GPU-based A3C for Deep Reinforcement Learning. ICLR. (2017).  Babaeizadeh M. Frosio I. Tyree S. Clemons J. and Kautz J. 2017. GA3C: GPU-based A3C for Deep Reinforcement Learning. ICLR. (2017)."},{"key":"e_1_3_2_1_3_1","volume-title":"Benchmarking Deep Reinforcement Learning for Continuous Control. ICML. 48","author":"Duan Y.","year":"2016"},{"key":"e_1_3_2_1_4_1","unstructured":"Geron A. 2017. Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts Tools and Techniques to Build Intelligent Systems. O'Reilly Media.   Geron A. 2017. Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts Tools and Techniques to Build Intelligent Systems. O'Reilly Media."},{"key":"e_1_3_2_1_5_1","unstructured":"Hasselt H. van Guez A. and Silver D. 2015. Deep Reinforcement Learning with Double Q-learning. CoRR. abs\/1509.06461 (2015).   Hasselt H. van Guez A. and Silver D. 2015. Deep Reinforcement Learning with Double Q-learning. CoRR. abs\/1509.06461 (2015)."},{"key":"e_1_3_2_1_6_1","volume-title":"Deep Reinforcement Learning with Double Q-learning. Proceedings of AAAI. 13, (2016","author":"Hasselt H.","year":"2094"},{"key":"e_1_3_2_1_7_1","unstructured":"Hasselt H.V. 2010. Double Q-learning. Advances in Neural Information Processing Systems 23. J.D. Lafferty C.K.I. Williams J. Shawe-Taylor R.S. Zemel and A. Culotta eds. Curran Associates Inc. 2613--2621.   Hasselt H.V. 2010. Double Q-learning. Advances in Neural Information Processing Systems 23. J.D. Lafferty C.K.I. Williams J. Shawe-Taylor R.S. Zemel and A. Culotta eds. Curran Associates Inc. 2613--2621."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Henderson P. Islam R. Bachman P. Pineau J. Precup D. and Meger D. 2018. Deep Reinforcement Learning that Matters. (AAAI 2018).  Henderson P. Islam R. Bachman P. Pineau J. Precup D. and Meger D. 2018. Deep Reinforcement Learning that Matters. (AAAI 2018).","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"e_1_3_2_1_9_1","volume-title":"Rainbow: Combining Improvements in Deep Reinforcement Learning. CoRR. abs\/1710.02298","author":"Hessel M.","year":"2017"},{"key":"e_1_3_2_1_10_1","unstructured":"Islam R. Henderson P. Gomrokchi M. and Precup D. 2017. Reproducibility of Benchmarked Deep Reinforcement Learning Tasks for Continuous Control. ICML. (2017).  Islam R. Henderson P. Gomrokchi M. and Precup D. 2017. Reproducibility of Benchmarked Deep Reinforcement Learning Tasks for Continuous Control. ICML. (2017)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Kai Arulkumaran Marc Peter Deisenroth Miles Brundage and Anil Anthony Bharath 2017. A Brief Survey of Deep Reinforcement Learning. IEEE Signal Processing Magazine. (2017).  Kai Arulkumaran Marc Peter Deisenroth Miles Brundage and Anil Anthony Bharath 2017. A Brief Survey of Deep Reinforcement Learning. IEEE Signal Processing Magazine. (2017).","DOI":"10.1109\/MSP.2017.2743240"},{"key":"e_1_3_2_1_12_1","unstructured":"Lillicrap T.P. Hunt J.J. Pritzel A. Heess N. Erez T. Tassa Y. Silver D. and Wierstra D. 2015. Continuous control with deep reinforcement learning. US Patent 20170024643 A1. (2015).  Lillicrap T.P. Hunt J.J. Pritzel A. Heess N. Erez T. Tassa Y. Silver D. and Wierstra D. 2015. Continuous control with deep reinforcement learning. US Patent 20170024643 A1. (2015)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Mnih V. et al. 2015. Human-level control through deep reinforcement learning. Nature. 518 7540 (2015) 529--533.  Mnih V. et al. 2015. Human-level control through deep reinforcement learning. Nature. 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_14_1","volume-title":"Asynchronous Methods for Deep Reinforcement Learning. ICML. 48, (2016), 1928","author":"Mnih V.","year":"1937"},{"key":"e_1_3_2_1_15_1","volume-title":"Connecting Generative Adversarial Networks and Actor-Critic Methods. NIPS Workshop on Adversarial Training.","author":"Pfau D.","year":"2016"},{"key":"e_1_3_2_1_16_1","unstructured":"Schaul T. Quan J. Antonoglou I. and Silver D. 2016. Prioritized Experience Replay. ICLR. (2016).  Schaul T. Quan J. Antonoglou I. and Silver D. 2016. Prioritized Experience Replay. ICLR. (2016)."},{"key":"e_1_3_2_1_17_1","unstructured":"Schulman J. Levine S. Moritz P. Jordan M.I. and Abbeel P. 2015. Trust Region Policy Optimization. ICML. (2015) 1889--1897.   Schulman J. Levine S. Moritz P. Jordan M.I. and Abbeel P. 2015. Trust Region Policy Optimization. ICML. (2015) 1889--1897."},{"key":"e_1_3_2_1_18_1","unstructured":"Schulman J. Moritz P. Levine S. Jordan M.I. and Abbeel P. 2016. High-Dimensional Continuous Control Using Generalized Advantage Estimation. ICLR. (2016).  Schulman J. Moritz P. Levine S. Jordan M.I. and Abbeel P. 2016. High-Dimensional Continuous Control Using Generalized Advantage Estimation. ICLR. (2016)."},{"key":"e_1_3_2_1_19_1","unstructured":"Schulman J. Wolski F. Dhariwal P. Radford A. and Klimov O. 2017. Proximal Policy Optimization Algorithms. CoRR. abs\/1707.06347 (2017).  Schulman J. Wolski F. Dhariwal P. Radford A. and Klimov O. 2017. Proximal Policy Optimization Algorithms. CoRR. abs\/1707.06347 (2017)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Selvaraju R.R. Das A. Vedantam R. Cogswell M. Parikh D. and Batra D. 2017. Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization. ICCV. (2017).  Selvaraju R.R. Das A. Vedantam R. Cogswell M. Parikh D. and Batra D. 2017. Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization. ICCV. (2017).","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_2_1_21_1","unstructured":"Tasfi N. 2016. PyGame Learning Environment. GitHub repository. (2016).  Tasfi N. 2016. PyGame Learning Environment. GitHub repository. (2016)."},{"key":"e_1_3_2_1_22_1","unstructured":"Wang Z. Bapst V. Heess N. Mnih V. Munos R. Kavukcuoglu K. and de Freitas N. 2017. Sample Efficient Actor-Critic with Experience Replay. ICLR. (2017).  Wang Z. Bapst V. Heess N. Mnih V. Munos R. Kavukcuoglu K. and de Freitas N. 2017. Sample Efficient Actor-Critic with Experience Replay. ICLR. (2017)."},{"key":"e_1_3_2_1_23_1","unstructured":"Wang Z. Freitas N. de and Lanctot M. 2015. Dueling Network Architectures for Deep Reinforcement Learning. CoRR. abs\/1511.06581 (2015).  Wang Z. Freitas N. de and Lanctot M. 2015. Dueling Network Architectures for Deep Reinforcement Learning. CoRR. abs\/1511.06581 (2015)."},{"key":"e_1_3_2_1_24_1","volume-title":"Dueling Network Architectures for Deep Reinforcement Learning. ICML. 16, (2016), 1995","author":"Wang Z.","year":"2003"}],"event":{"name":"ICDLT '18: 2018 2nd International Conference on Deep Learning Technologies","location":"Chongqing China","acronym":"ICDLT '18","sponsor":["Chongqing University of Posts and Telecommunications","University of Electronic Science and Technology of China University of Electronic Science and Technology of China"]},"container-title":["Proceedings of the 2018 2nd International Conference on Deep Learning Technologies"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3234804.3234816","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3234804.3234816","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:39:20Z","timestamp":1750210760000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3234804.3234816"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,27]]},"references-count":24,"alternative-id":["10.1145\/3234804.3234816","10.1145\/3234804"],"URL":"https:\/\/doi.org\/10.1145\/3234804.3234816","relation":{},"subject":[],"published":{"date-parts":[[2018,6,27]]},"assertion":[{"value":"2018-06-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}