{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T17:37:04Z","timestamp":1769276224328,"version":"3.49.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,22]]},"DOI":"10.1109\/hpec43674.2020.9286150","type":"proceedings-article","created":{"date-parts":[[2020,12,22]],"date-time":"2020-12-22T21:07:15Z","timestamp":1608671235000},"page":"1-7","source":"Crossref","is-referenced-by-count":5,"title":["How to Efficiently Train Your AI Agent? Characterizing and Evaluating Deep Reinforcement Learning on Heterogeneous Platforms"],"prefix":"10.1109","author":[{"given":"Yuan","family":"Meng","sequence":"first","affiliation":[]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Sanmukh","family":"Kuppannagari","sequence":"additional","affiliation":[]},{"given":"Rajgopal","family":"Kannan","sequence":"additional","affiliation":[]},{"given":"Viktor","family":"Prasanna","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","year":"0","journal-title":"Openai baselines implementation of a2c"},{"key":"ref32","author":"ruder","year":"2016","journal-title":"An overview of gradient descent optimization algorithms"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7056.001.0001"},{"key":"ref30","year":"0","journal-title":"Xilinx alveo u200 data center accelerator card"},{"key":"ref37","author":"liang","year":"2017","journal-title":"RLlib Abstractions for Distributed Reinforcement Learning"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5518"},{"key":"ref35","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"12th USENIX Symposium on Operating Systems Design and Implementation ( OSDI 16)"},{"key":"ref34","year":"0","journal-title":"Subprocvecenv Vectorized environments"},{"key":"ref10","author":"schaul","year":"2015","journal-title":"Prioritized experience replay"},{"key":"ref11","year":"2017","journal-title":"OpenAI Baselines ACKTR and A2C"},{"key":"ref12","author":"dhariwal","year":"2017","journal-title":"OpenAI Baselines"},{"key":"ref13","year":"1972","journal-title":"Atari Games"},{"key":"ref14","author":"schulman","year":"2017","journal-title":"Prox-imal policy optimization algorithms"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref16","author":"suttan","year":"1998","journal-title":"Reinforcement Learning An Introduction MIT Press"},{"key":"ref17","year":"0","journal-title":"Openai spinningup implementation of ppo"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"499","DOI":"10.1145\/3297858.3304058","article-title":"Fa3c: Fpga-accelerated deep reinforcement learning","author":"cho","year":"2019","journal-title":"Proceedings of the fourth international conference on Architectural support for programming languages and operating systems - AS"},{"key":"ref19","year":"0","journal-title":"TensorFlow&#x2122;"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2003.09.017"},{"key":"ref28","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref3","author":"gankidi","year":"2016","journal-title":"Fpga accelerator architecture for q-learning and its applications in space exploration rovers"},{"key":"ref27","author":"espeholt","year":"2018","journal-title":"IM-PALA Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM48280.2020.00012"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.metabol.2017.01.011"},{"key":"ref29","year":"0","journal-title":"Nvidia titan xp"},{"key":"ref8","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF02055574"},{"key":"ref2","author":"shalev-shwartz","year":"2016","journal-title":"Safe multiagent reinforcement learning for autonomous driving"},{"key":"ref1","author":"sutton","year":"2006","journal-title":"Reinforcement Learning and Artificial Intelligence"},{"key":"ref9","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref20","author":"jia","year":"2018","journal-title":"Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking"},{"key":"ref22","year":"0","journal-title":"Xilinx white paper Performance of pci express systems"},{"key":"ref21","volume":"857","author":"bi","year":"2017","journal-title":"Embedded Systems Technology 15th National Conference ESTC 2017"},{"key":"ref24","author":"wang","year":"2019","journal-title":"Benchmarking model-based reinforcement learning"},{"key":"ref23","year":"0","journal-title":"Openai gym mujoco environments"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref25","first-page":"1329","article-title":"Bench-marking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning"}],"event":{"name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","location":"Waltham, MA, USA","start":{"date-parts":[[2020,9,22]]},"end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9285977\/9286137\/09286150.pdf?arnumber=9286150","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:33:08Z","timestamp":1656343988000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9286150\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,22]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/hpec43674.2020.9286150","relation":{},"subject":[],"published":{"date-parts":[[2020,9,22]]}}}