{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T01:52:55Z","timestamp":1769219575256,"version":"3.49.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada (NSERC)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,10]]},"DOI":"10.1109\/icfpt64416.2024.11113396","type":"proceedings-article","created":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T19:38:13Z","timestamp":1755545893000},"page":"01-09","source":"Crossref","is-referenced-by-count":1,"title":["HEPPO: Hardware-Efficient Proximal Policy Optimization a Universal Pipelined Architecture for Generalized Advantage Estimation"],"prefix":"10.1109","author":[{"given":"Hazem","family":"Taha","sequence":"first","affiliation":[{"name":"McMaster University,Hamilton,Ontario,L8S 4L8"}]},{"given":"Ameer M. S.","family":"Abdelhadi","sequence":"additional","affiliation":[{"name":"McMaster University,Hamilton,Ontario,L8S 4L8"}]}],"member":"263","reference":[{"issue":"2","key":"ref1","first-page":"319","article-title":"Deep Reinforcement Learning for Robotic Manipulation-The State of the Art","volume":"4","author":"Levine","year":"2018","journal-title":"IEEE Robotics and Automation Letters"},{"key":"ref2","article-title":"Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm","volume-title":"arXiv preprint arXiv","author":"Silver","year":"2017"},{"key":"ref3","article-title":"Proximal Policy Optimization Algorithms","volume-title":"arXiv preprint arXiv","author":"Schulman","year":"2017"},{"key":"ref4","first-page":"1889","article-title":"Trust Region Policy Optimization","volume-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML)","author":"Schulman","year":"2015"},{"key":"ref5","article-title":"Adam: A Method for Stochastic Optimization","volume-title":"Proceedings of the 3rd International Conference on Learning Representations (ICLR)","author":"Kingma"},{"key":"ref6","article-title":"High-Dimensional Continuous Control Using Generalized Advantage Estimation","volume-title":"arXiv preprint arXiv","author":"Schulman","year":"2015"},{"key":"ref7","article-title":"QuaRL: Quantization for Fast and Environmentally Sustainable Reinforcement Learning","volume-title":"arXiv preprint arXiv","author":"Krishnan","year":"2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586213"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM48280.2020.00012"},{"key":"ref10","article-title":"EnvPool: A Highly Parallel Reinforcement Learning Environment Execution Engine","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems (NeurIPS)","author":"Weng"},{"key":"ref11","article-title":"Accelerating Reinforcement Learning through GPU Atari Emulation","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems (NeurIPS)","author":"Dalton"},{"key":"ref12","article-title":"GPU-Accelerated Robotic Simulation for Distributed Reinforcement Learning","volume-title":"Proceedings of the 2nd Conference on Robot Learning (CoRL)","author":"Liang"},{"issue":"3","key":"ref13","first-page":"419","article-title":"Note on a Method for Calculating Corrected Sums of Squares and Products","volume-title":"Technometrics","volume":"4","author":"Welford","year":"1962"},{"key":"ref14","article-title":"The Art of Computer Programming","volume-title":"Seminumerical Algorithms","volume":"2","author":"Knuth","year":"1998"},{"key":"ref15","volume-title":"Justifying Advantage Normalization for PPO","author":"Yang","year":"2021"},{"key":"ref16","article-title":"mbcel","volume-title":"Understanding Normalization of Advantage Function in PPO","year":"2018"},{"key":"ref17","volume-title":"Coding PPO from Scratch with PyTorch (Part 4\/4)","author":"Yu","year":"2023"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2013.63"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT47387.2019.00073"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/FPL53798.2021.00062"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM51124.2021.00057"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/NorCAS51424.2020.9265139"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/NoCArc51382.2020.9234573"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ASYNC.2017.20"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640356"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527438"},{"key":"ref27","first-page":"120","article-title":"Boveda: Building an On-Chip Deep Learning Memory Hierarchy Brick by Brick","volume-title":"Proceedings of Machine Learning and Systems (MLSys)","volume":"3","author":"Edo Vivancos","year":"2021"}],"event":{"name":"2024 International Conference on Field Programmable Technology (ICFPT)","location":"Sydney, Australia","start":{"date-parts":[[2024,12,10]]},"end":{"date-parts":[[2024,12,12]]}},"container-title":["2024 International Conference on Field Programmable Technology (ICFPT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11113383\/11113387\/11113396.pdf?arnumber=11113396","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T04:47:15Z","timestamp":1755578835000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11113396\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/icfpt64416.2024.11113396","relation":{},"subject":[],"published":{"date-parts":[[2024,12,10]]}}}