{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T02:39:02Z","timestamp":1772591942890,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,7,19]],"date-time":"2019-07-19T00:00:00Z","timestamp":1563494400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,7,19]]},"DOI":"10.1145\/3351917.3351989","type":"proceedings-article","created":{"date-parts":[[2019,9,23]],"date-time":"2019-09-23T12:10:23Z","timestamp":1569240623000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["An Overview of Deep Reinforcement Learning"],"prefix":"10.1145","author":[{"given":"LiChun","family":"Cao","sequence":"first","affiliation":[{"name":"College of Computer Science and Techonlogy, Inner Mongolia normal university, Inner Mongolia autonomous region"}]},{"family":"ZhiMin","sequence":"additional","affiliation":[{"name":"College of Computer Science and Techonlogy, Inner Mongolia normal university, Inner Mongolia autonomous region"}]}],"member":"320","published-online":{"date-parts":[[2019,7,19]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"e_1_3_2_1_3_1","first-page":"18","article-title":"[Lecture Notes in Computer Science] Algorithmic Learning Theory Volume 6925 || On the Expressive Power of Deep Architectures[C]\/\/ International Conference on Algorithmic Learning Theory","volume":"2011","author":"Kivinen J","journal-title":"Springer-Verlag"},{"key":"e_1_3_2_1_4_1","volume-title":"USA: Prineeton University","author":"Minsky M L","year":"1954"},{"key":"e_1_3_2_1_5_1","volume-title":"USA: MIT Press","author":"Sutton R S","year":"1998"},{"key":"e_1_3_2_1_6_1","first-page":"0","volume-title":"MA 1998, 322","author":"Johnson J D","year":"1939"},{"issue":"6","key":"e_1_3_2_1_7_1","first-page":"A187","article-title":"Continuous control with deep reinforcement learning","volume":"8","author":"Lillicrap T P","year":"2015","journal-title":"Computer Science"},{"key":"e_1_3_2_1_8_1","unstructured":"Mnih V Badia Adri\u00e0 Puigdom\u00e8nech Mirza M etal Asynchronous Methods for Deep Reinforcement Learning[J]. 2016.  Mnih V Badia Adri\u00e0 Puigdom\u00e8nech Mirza M et al. Asynchronous Methods for Deep Reinforcement Learning[J]. 2016."},{"key":"e_1_3_2_1_9_1","unstructured":"Schulman J Wolski F Dhariwal P etal Proximal Policy Optimization Algorithms[J]. 2017.  Schulman J Wolski F Dhariwal P et al. Proximal Policy Optimization Algorithms[J]. 2017."},{"key":"e_1_3_2_1_10_1","volume-title":"Computer Science","author":"Mnih V","year":"2013"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Lan X Wang H Gong S etal Deep Reinforcement Learning Attention Selection for Person Re-Identification[J]. 2017.  Lan X Wang H Gong S et al. Deep Reinforcement Learning Attention Selection for Person Re-Identification[J]. 2017.","DOI":"10.5244\/C.31.121"},{"key":"e_1_3_2_1_12_1","volume-title":"Speech and Signal Processing","author":"Graves A","year":"2013"},{"key":"e_1_3_2_1_13_1","volume-title":"Harbin Institute of Technology","year":"2017"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Clark K Manning C D. Deep Reinforcement Learning for Mention-Ranking Coreference Models[J]. 2016.  Clark K Manning C D. Deep Reinforcement Learning for Mention-Ranking Coreference Models[J]. 2016.","DOI":"10.18653\/v1\/D16-1245"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.38.8.716"},{"key":"e_1_3_2_1_16_1","unstructured":"VAN HASSELT H GUEZ A SILVER D. Deep Reinforcement Learning with Double Q-Learning[C \/ OL]. [2018-12-26].https:\/\/arxiv.org\/pdf\/1509.06461.pdf.  VAN HASSELT H GUEZ A SILVER D. Deep Reinforcement Learning with Double Q-Learning[C \/ OL]. [2018-12-26].https:\/\/arxiv.org\/pdf\/1509.06461.pdf."},{"key":"e_1_3_2_1_17_1","unstructured":"HAUSKNECHT M STONE P. Deep Recurrent Q-Learning for Par-tially Observable MDPs[C\/OL]. [2018-2-26]. https:\/\/arxiv.org\/pdf\/1507.06527.pdf.  HAUSKNECHT M STONE P. Deep Recurrent Q-Learning for Par-tially Observable MDPs[C\/OL]. [2018-2-26]. https:\/\/arxiv.org\/pdf\/1507.06527.pdf."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Coulom R. Efficient Selectivity and Backup Operators in Monte-Carlo Tree Search[C]\/\/ Proc of the International Conference on Computer & Games. 2006.  Coulom R. Efficient Selectivity and Backup Operators in Monte-Carlo Tree Search[C]\/\/ Proc of the International Conference on Computer & Games. 2006.","DOI":"10.1007\/978-3-540-75538-8_7"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1025696116075"},{"issue":"3","key":"e_1_3_2_1_20_1","first-page":"1131","article-title":"Multi-task Reinforcement Learning in Partially Observable Stochastic Environments. [J]","volume":"10","author":"Hui L","year":"2009","journal-title":"Journal of Machine Learning Research"},{"issue":"4","key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","first-page":"e0172395","DOI":"10.1371\/journal.pone.0172395","article-title":"Multiagent cooperation and competition with deep reinforcement learning[J]","volume":"12","author":"Ardi T","year":"2017","journal-title":"PLOS ONE"},{"key":"e_1_3_2_1_22_1","unstructured":"Oh J Chockalingam V Singh S etal Control of Memory Active Perception and Action in Minecraft[J]. 2016.  Oh J Chockalingam V Singh S et al. Control of Memory Active Perception and Action in Minecraft[J]. 2016."},{"key":"e_1_3_2_1_23_1","volume-title":"IEEE","author":"Lange","year":"2010"},{"key":"e_1_3_2_1_24_1","volume-title":"IEEE","author":"Lange","year":"2012"},{"key":"e_1_3_2_1_25_1","volume-title":"Computer Science","author":"Mnih V","year":"2013"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"e_1_3_2_1_29_1","volume-title":"Computer Science","author":"Van Hasselt H","year":"2015"},{"key":"e_1_3_2_1_30_1","unstructured":"Wang Z Schaul T Hessel M etal Dueling Network Architectures for Deep Reinforcement Learning[J]. 2015.  Wang Z Schaul T Hessel M et al. Dueling Network Architectures for Deep Reinforcement Learning[J]. 2015."},{"key":"e_1_3_2_1_31_1","volume-title":"Computer Science","author":"Hausknecht M","year":"2015"},{"key":"e_1_3_2_1_32_1","unstructured":"Mnih V Badia Adri\u00e0 Puigdom\u00e8nech Mirza M etal Asynchronous Methods for Deep Reinforcement Learning[J]. 2016.  Mnih V Badia Adri\u00e0 Puigdom\u00e8nech Mirza M et al. Asynchronous Methods for Deep Reinforcement Learning[J]. 2016."},{"key":"e_1_3_2_1_33_1","unstructured":"Fortunato M Azar M G Piot B etal Noisy Networks for Exploration[J]. 2017.  Fortunato M Azar M G Piot B et al. Noisy Networks for Exploration[J]. 2017."},{"key":"e_1_3_2_1_34_1","volume-title":"4th International Con-ference on Learning Representations","year":"2016"},{"key":"e_1_3_2_1_35_1","unstructured":"Bellemare M G Dabney W Munos R\u00e9mi. A Distributional Perspective on Reinforcement Learning[J]. 2017.  Bellemare M G Dabney W Munos R\u00e9mi. A Distributional Perspective on Reinforcement Learning[J]. 2017."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Hessel M Modayil J Van Hasselt H etal Rainbow: Combining Improvements in Deep Reinforcement Learning[J]. 2017.  Hessel M Modayil J Van Hasselt H et al. Rainbow: Combining Improvements in Deep Reinforcement Learning[J]. 2017.","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Caicedo J C Lazebnik S. Active Object Localization with Deep Reinforcement Learning[J]. 2015.  Caicedo J C Lazebnik S. Active Object Localization with Deep Reinforcement Learning[J]. 2015.","DOI":"10.1109\/ICCV.2015.286"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Li D Wu H Zhang J etal A2-RL: Aesthetics Aware Reinforcement Learning for Image Cropping[J]. 2017.  Li D Wu H Zhang J et al. A2-RL: Aesthetics Aware Reinforcement Learning for Image Cropping[J]. 2017.","DOI":"10.1109\/CVPR.2018.00855"},{"key":"e_1_3_2_1_39_1","unstructured":"Oh J Guo X Lee H etal Action-Conditional Video Prediction using Deep Networks in Atari Games[J]. 2015.  Oh J Guo X Lee H et al. Action-Conditional Video Prediction using Deep Networks in Atari Games[J]. 2015."},{"key":"e_1_3_2_1_40_1","unstructured":"Xie L Wang S Markham A etal Towards Monocular Vision based Obstacle Avoidance through Deep Reinforcement Learning[J]. 2017.  Xie L Wang S Markham A et al. Towards Monocular Vision based Obstacle Avoidance through Deep Reinforcement Learning[J]. 2017."},{"key":"e_1_3_2_1_41_1","unstructured":"Zhang M Feng J Montejo K etal Lift-the-Flap: Context Reasoning Using Object-Centered Graphs[J]. 2019.  Zhang M Feng J Montejo K et al. Lift-the-Flap: Context Reasoning Using Object-Centered Graphs[J]. 2019."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Chen T Wang Z Li G etal Recurrent Attentional Reinforcement Learning for Multi-label Image Recognition[J]. 2017.  Chen T Wang Z Li G et al. Recurrent Attentional Reinforcement Learning for Multi-label Image Recognition[J]. 2017.","DOI":"10.1609\/aaai.v32i1.12281"},{"key":"e_1_3_2_1_43_1","first-page":"1","article-title":"Generation text with deep reinforcement learning\/\/Proceedings of the Workshops of Advances in Neural Information Processing Systems","volume":"2015","author":"Guo H","journal-title":"Montreal, Canada"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Clark K Manning C D. Deep Reinforcement Learning for Mention-Ranking Coreference Models[J]. 2016.  Clark K Manning C D. Deep Reinforcement Learning for Mention-Ranking Coreference Models[J]. 2016.","DOI":"10.18653\/v1\/D16-1245"},{"key":"e_1_3_2_1_45_1","volume-title":"USA","author":"Satija H"},{"key":"e_1_3_2_1_46_1","volume-title":"Computer Science","author":"Narasimhan K","year":"2015"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Su P H Gasic M Mrksic N etal On-line Active Reward Learning for Policy Optimisation in Spoken Dialogue Systems[J]. 2016.  Su P H Gasic M Mrksic N et al. On-line Active Reward Learning for Policy Optimisation in Spoken Dialogue Systems[J]. 2016.","DOI":"10.18653\/v1\/P16-1230"},{"key":"e_1_3_2_1_48_1","unstructured":"Xia Y He D Qin T etal Dual Learning for Machine Translation[J]. 2016.  Xia Y He D Qin T et al. Dual Learning for Machine Translation[J]. 2016."},{"key":"e_1_3_2_1_49_1","first-page":"2852","article-title":"SeqGAN: sequence generative adversarial nets with policy gradient [C] \/\/Proceedings of the 31st AAAI Conference on Artificial Intelligence (AAAI)","volume":"2017","journal-title":"San Francisco: AAAI"},{"key":"e_1_3_2_1_50_1","volume-title":"IEEE","author":"Chebotar Y","year":"2016"},{"key":"e_1_3_2_1_51_1","unstructured":"Popov I Heess N Lillicrap T etal Data-efficient Deep Reinforcement Learning for Dexterous Manipulation[J]. 2017.  Popov I Heess N Lillicrap T et al. Data-efficient Deep Reinforcement Learning for Dexterous Manipulation[J]. 2017."},{"key":"e_1_3_2_1_52_1","unstructured":"Kalashnikov D Irpan A Pastor P etal QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation[J]. 2018.  Kalashnikov D Irpan A Pastor P et al. QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation[J]. 2018."},{"key":"e_1_3_2_1_53_1","first-page":"490","article-title":"Leveraging Deep Reinforce-ment Learning for Reaching Robotic Tasks \/\/","volume":"2017","journal-title":"Washington, USA: IEEE"},{"key":"e_1_3_2_1_54_1","volume-title":"Computer Science","author":"Zhang F","year":"2015"},{"key":"e_1_3_2_1_55_1","unstructured":"Finn C Levine S Abbeel P. Guided cost learning: deep inverse optimal control via policy optimization[C]\/\/ International Conference on International Conference on Machine Learning. 2016.  Finn C Levine S Abbeel P. Guided cost learning: deep inverse optimal control via policy optimization[C]\/\/ International Conference on International Conference on Machine Learning. 2016."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2016.17216"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature21056"},{"key":"e_1_3_2_1_58_1","volume-title":"IEEE","author":"Liu Y","year":"2017"},{"key":"e_1_3_2_1_59_1","unstructured":"Raghu A Komorowski M Celi L A etal Continuous State-Space Models for Optimal Sepsis Treatment - a Deep Reinforcement Learning Approach[J]. 2017.  Raghu A Komorowski M Celi L A et al. Continuous State-Space Models for Optimal Sepsis Treatment - a Deep Reinforcement Learning Approach[J]. 2017."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Dhingra B Li L Li X etal Towards End-to-End Reinforcement Learning of Dialogue Agents for Information Access[J]. 2016.  Dhingra B Li L Li X et al. Towards End-to-End Reinforcement Learning of Dialogue Agents for Information Access[J]. 2016.","DOI":"10.18653\/v1\/P17-1045"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"crossref","unstructured":"Kendall A Hawke J Janz D etal Learning to Drive in a Day[J]. 2018.  Kendall A Hawke J Janz D et al. Learning to Drive in a Day[J]. 2018.","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2012.12.023"},{"key":"e_1_3_2_1_63_1","unstructured":"Baker B Gupta O Naik N etal Designing Neural Network Architectures using Reinforcement Learning[J]. 2016.  Baker B Gupta O Naik N et al. Designing Neural Network Architectures using Reinforcement Learning[J]. 2016."}],"event":{"name":"CACRE2019: 2019 4th International Conference on Automation, Control and Robotics Engineering","location":"Shenzhen China","acronym":"CACRE2019","sponsor":["Sichuan University"]},"container-title":["Proceedings of the 2019 4th International Conference on Automation, Control and Robotics Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3351917.3351989","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3351917.3351989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:26:15Z","timestamp":1750206375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3351917.3351989"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7,19]]},"references-count":63,"alternative-id":["10.1145\/3351917.3351989","10.1145\/3351917"],"URL":"https:\/\/doi.org\/10.1145\/3351917.3351989","relation":{},"subject":[],"published":{"date-parts":[[2019,7,19]]},"assertion":[{"value":"2019-07-19","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}