{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T11:27:25Z","timestamp":1725794845796},"reference-count":151,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,5]]},"DOI":"10.1109\/ispass61541.2024.00029","type":"proceedings-article","created":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T17:19:44Z","timestamp":1721150384000},"page":"217-229","source":"Crossref","is-referenced-by-count":1,"title":["SwiftRL: Towards Efficient Reinforcement Learning on Real Processing-In-Memory Systems"],"prefix":"10.1109","volume":"35","author":[{"given":"Kailash","family":"Gogineni","sequence":"first","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Sai Santosh","family":"Dayapule","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Juan","family":"G\u00f3mez-Luna","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Karthikeya","family":"Gogineni","sequence":"additional","affiliation":[{"name":"Independent"}]},{"given":"Peng","family":"Wei","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Tian","family":"Lan","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Mohammad","family":"Sadrosadati","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Guru","family":"Venkataramani","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]}],"member":"263","reference":[{"journal-title":"A Bradford Book","article-title":"Reinforcement Learning: An Introduction. Second Ed","year":"2018","author":"Sutton","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"journal-title":"arXiv preprint","article-title":"Solving Rubiks Cube with a Robot Hand","year":"2019","author":"Akkaya","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"journal-title":"IEEE TNNLS","article-title":"A survey on offline reinforcement learning: Taxonomy, review, and open problems","year":"2023","author":"Figueiredo","key":"ref6"},{"journal-title":"arXiv preprint","article-title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems","year":"2020","author":"Levine","key":"ref7"},{"journal-title":"ICML","article-title":"An Optimistic Perspective on Offline Reinforcement Learning","year":"2020","author":"Agarwal","key":"ref8"},{"key":"ref9","article-title":"Leveraging Fac-tored Action Spaces for Efficient Offline Reinforcement Learning in Healthcare","volume":"35","author":"Tang","year":"2022","journal-title":"NeurIPS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20393"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2019.01.009"},{"journal-title":"Emerging Computing: From Devices to Systems: Looking Beyond Moore and Von Neumann","article-title":"A Modern Primer on Processing in Memory","year":"2022","author":"Mutlu","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173177"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00019"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2934048"},{"journal-title":"arXiv preprint","article-title":"In-DRAM Bulk Bitwise Execution Engine","year":"2019","author":"Seshadri","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3323476"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875680"},{"volume-title":"UPMEM Website","year":"2023","key":"ref19"},{"journal-title":"Introduction to UPMEM PIM. Processing-In-Memory (PIM) on DRAM Accelera-tor (White Paper)","year":"2018","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IGSC54211.2021.9651614"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3174101"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI54635.2022.00064"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ispass57527.2023.00013"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS57527.2023.00031"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00017"},{"journal-title":"bioRxiv","article-title":"GAPiM: Discovering Genetic Variations on a Real Processing-in-Memory System","year":"2023","author":"Abecassis","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00039"},{"journal-title":"arXiv preprint","article-title":"Analysis of Distributed Optimization Algorithms on a Real Processing-In-Memory System","year":"2024","author":"Rhyner","key":"ref31"},{"journal-title":"arXiv preprint","article-title":"Accelerating Graph Neural Networks on Real Processing-In-Memory Systems","year":"2024","author":"Giannoula","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btad155"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC59245.2023.00030"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3202350"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/SOCC56010.2022.9908126"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3589258"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247915"},{"journal-title":"arXiv preprint","article-title":"PIM-GPT: A Hybrid Process-in-Memory Ac-celerator for Autoregressive Transformers","year":"2023","author":"Wu","key":"ref39"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref41","article-title":"Sample Complexity of Asynchronous Q-Learning: Sharper Analysis and Variance Reduction","volume":"33","author":"Li","year":"2020","journal-title":"NeurIPS"},{"journal-title":"arXiv preprint","article-title":"OpenAI GYM","year":"2016","author":"Brockman","key":"ref42"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW50202.2020.00024"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"volume-title":"Intel\u00ae Advisor User Guide","year":"2021","key":"ref45"},{"journal-title":"USENIX ATC","article-title":"A Case Study of {Processing-In-Memory} in {off-the-Shelf} Systems","year":"2021","author":"Nider","key":"ref46"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00059"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3373311"},{"journal-title":"arXiv preprint","article-title":"Accelerating Time Series Analysis via Processing using Non-Volatile Memories","year":"2022","author":"Fernandez","key":"ref49"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00029"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3592980.3595312"},{"journal-title":"arXiv preprint","article-title":"PID-Comm: A Fast and Flexible Collective Communication Framework for Commodity Processing-in-DIMM Devices","year":"2024","author":"Noh","key":"ref52"},{"journal-title":"arXiv preprint","article-title":"CINM (Cinnamon): A Compilation Infrastructure for Heterogeneous Compute In-Memory and Compute Near-Memory Paradigms","year":"2022","author":"Khan","key":"ref53"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243187"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3558481.3591070"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48803-0_13"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640428"},{"journal-title":"Parallelization of the Banded Needleman & Wunsch Algorithm on UPMEM PiM Architecture for Long DNA Sequence Alignment","author":"Mognol","key":"ref58"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00018"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3534056.3534946"},{"journal-title":"HotStorage","article-title":"Processing in Storage Class Memory","year":"2020","author":"Nider","key":"ref61"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICDEW58674.2023.00035"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF59524.2023.10476816"},{"article-title":"BLAST on UPMEM","volume-title":"Ph.D. dissertation, INRIA Rennes-Bretagne Atlantique","year":"2016","author":"Lavenier","key":"ref64"},{"journal-title":"arXiv preprint ar Xiv","article-title":"Accelerating Regular Path Queries over Graph Database with Processing-in-Memory","year":"2024","author":"Ma","key":"ref65"},{"journal-title":"arXiv preprint","article-title":"The Landscape of Compute-near-memory and Compute-in-memory: A Research and Commercial Overview","year":"2024","author":"Khan","key":"ref66"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365862"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.46506\/jica.2021.2.1.043"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3097700"},{"journal-title":"ISSCC","article-title":"A 1ynm 1.25 V 8Gb, 16Gb\/s\/pin GDDR6-based Accelerator-in-Memory Supporting 1 TFLOPS MAC Operation and Various Activation Functions for Deep- Learning Applications","year":"2022","author":"Lee","key":"ref70"},{"journal-title":"ISSCC","article-title":"184QPS\/W 64Mb\/mm 2 3D Logic-to-DRAM Hybrid Bonding with Process-Near-Memory Engine for Recommendation System","year":"2022","author":"Niu","key":"ref71"},{"article-title":"Lecture 20: Graphics Processing Units","volume-title":"video recording available","year":"2020","author":"Mutlu","key":"ref72"},{"journal-title":"UPMEM User Manual. Version 2023.1.0","year":"2023","key":"ref73"},{"volume-title":"UPMEM Software Development Kit (SDK)","year":"2023","key":"ref74"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"journal-title":"Advanced computer architecture","article-title":"A Pipelined, Shared Resource MIMD Computer","year":"1986","author":"Smith","key":"ref76"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1117\/12.932535"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2024.3387472"},{"journal-title":"ar Xiv preprint","article-title":"Playing Atari with Deep Reinforcement Learning","year":"2013","author":"Mnih","key":"ref79"},{"journal-title":"arXiv preprint","article-title":"StarCraft II: A New Challenge for Reinforcement Learning","year":"2017","author":"Vinyals","key":"ref80"},{"journal-title":"AAMAS","article-title":"MAC-PO: Multi-Agent Experience Replay via Collective Priority Optimization","year":"2023","author":"Mei","key":"ref81"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP57973.2023.00041"},{"journal-title":"MLArchSys, ISCA","article-title":"Towards Efficient Multi-Agent Learning Systems","year":"2023","author":"Gogineni","key":"ref83"},{"journal-title":"FastPath, ISPASS","article-title":"Scalability Bottlenecks in Multi-Agent Reinforcement Learning Systems","year":"2023","author":"Gogineni","key":"ref84"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1016\/j.rser.2020.110618"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2941229"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50035-0"},{"key":"ref88","article-title":"Deep Q-Learning With Q-Matrix Transfer Learning for Novel Fire Evacuation Environment","volume":"51","author":"Sharma","year":"2020","journal-title":"IEEE SMC: Systems"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2961174"},{"journal-title":"AISec","article-title":"Bridging Automated to Au-tonomous Cyber Defense: Foundational Analysis of Tabular Q-Learning","year":"2022","author":"Applebaum","key":"ref90"},{"journal-title":"ICML","article-title":"On the existence of fixed points for q-Iearning and sarsa in partially observable domains","year":"2002","author":"Perkins","key":"ref91"},{"volume":"37","volume-title":"On-line Q-Learning Using Connectionist Systems. University of Cambridge, Department of Engineering","year":"1994","author":"Rummery","key":"ref92"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/3126495"},{"journal-title":"MICRO","article-title":"Pythia: A Customizable Hardware Pre fetching Framework Using Online Rein-forcement Learning","year":"2021","author":"Bera","key":"ref94"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.21"},{"journal-title":"IEEE TC","article-title":"A Q-Learning Based Self-Adaptive I\/O Communication for 2.5D Integrated Many-Core Microprocessor and Memory","year":"2015","author":"PD","key":"ref96"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8714869"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527442"},{"key":"ref99","article-title":"The Epoch-Greedy Algorithm for Contextual Multi-armed Bandits","volume":"20","author":"Langford","year":"2007","journal-title":"NeurIPS"},{"journal-title":"Reinforcement Learning Coach","year":"2017","author":"Caspi","key":"ref100"},{"journal-title":"arXiv preprint","article-title":"Is Independent Learning All You Need in the StarCraft Multi-Agent Challenge?","year":"2020","author":"De Witt","key":"ref101"},{"journal-title":"CoRL","article-title":"SMARTS: Scalable Multi-Agent Reinforcement Learning Training School for Autonomous Driving","year":"2021","author":"Zhou","key":"ref102"},{"journal-title":"arXiv preprint","article-title":"Safe, Multi-Agent, Reinforce-ment Learning for Autonomous Driving","year":"2016","author":"Shalev-Shwartz","key":"ref103"},{"journal-title":"IJCNN","article-title":"Multi-Agent Connected Autonomous Driving using Deep Rein-forcement Learning","year":"2020","author":"Palanisamy","key":"ref104"},{"journal-title":"AAMAS","article-title":"Training Cooperative Agents for Multi-Agent Reinforcement Learning","year":"2019","author":"Bhalla","key":"ref105"},{"journal-title":"International Congress of Mathematicians","article-title":"Independent Learning in Stochastic Games","year":"2021","author":"Ozdaglar","key":"ref106"},{"journal-title":"IEEE T-ITS","article-title":"Deep reinforcement learning for autonomous driving: A survey","year":"2021","author":"Kiran","key":"ref107"},{"journal-title":"arXiv preprint","article-title":"CM3: Cooperative Multi-goal Multi-Stage Multi-Agent Reinforcement Learning","year":"2018","author":"Yang","key":"ref108"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"journal-title":"arXiv preprint","article-title":"A Review of Safe Reinforcement Learning: Methods, Theory and Applications","year":"2022","author":"Gu","key":"ref110"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1145\/318123.318229"},{"volume-title":"Xeon\u00ae Silver 4110 Processor","author":"I. S. P.","key":"ref113"},{"volume-title":"NVIDIA Ampere Architecture","key":"ref114"},{"journal-title":"arXiv preprint a rXiv","article-title":"The Blessing of Heterogeneity in Federated Q- learning: Linear Speedup and Beyond","year":"2023","author":"Woo","key":"ref115"},{"journal-title":"arXiv preprint","article-title":"Federated Reinforcement Learning: Tech- niques, Applications, and Open Challenges","year":"2021","author":"Qi","key":"ref116"},{"journal-title":"ICML","article-title":"Asynchronous Methods for Deep Reinforcement Learning","year":"2016","author":"Mnih","key":"ref117"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415663"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/tc.2018.2876312"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045288"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045192"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00040"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196029"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1145\/3093315.3037702"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322237"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/jssc.2022.3200718"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.22"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/CAHPC.2018.8645905"},{"journal-title":"arXiv preprint","article-title":"ORIGAMI: A Heterogeneous Split Architecture for In-Memory Acceleration of Learning","year":"2018","author":"Falahati","key":"ref129"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1145\/3386263.3407649"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2022.3161126"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2022.102561"},{"article-title":"Offloading embedding lookups to processing-in-memory for deep learning recommender models","volume-title":"Ph.D. dissertation","year":"2023","author":"Zarif","key":"ref133"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1145\/3628599"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM49941.2020.9313351"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1007\/s13222-023-00456-z"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/3639046"},{"journal-title":"ICLR","article-title":"GA3C: GPU-based A3C for Deep Reinforcement Learning","year":"2017","author":"Babaeizadeh","key":"ref138"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304058"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"journal-title":"arXiv preprint","article-title":"ACME: A Research Framework for Distributed Reinforcement Learning","year":"2020","author":"Hoffman","key":"ref141"},{"journal-title":"arXiv preprint","article-title":"Accelerated Methods for Deep Reinforcement Learning","year":"2018","author":"Stooke","key":"ref142"},{"journal-title":"arXiv preprint","article-title":"Efficient Parallel Methods for Deep Reinforcement Learning","year":"2017","author":"Clemente","key":"ref143"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981441"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00038"},{"key":"ref146","article-title":"Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations","volume":"18","author":"Hubara","year":"2017","journal-title":"JMLR"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218516"},{"journal-title":"ICML","article-title":"Improving Neural Network Quantization without Retraining using Outlier Channel Splitting","year":"2019","author":"Zhao","key":"ref148"},{"journal-title":"arXiv preprint","article-title":"Mixed Precision Training","year":"2017","author":"Micikevicius","key":"ref149"},{"journal-title":"ICML","article-title":"Low-Precision Reinforcement Learning: Running Soft Actor-Critic in Half Precision","year":"2021","author":"Bjorck","key":"ref150"},{"journal-title":"TMLR","article-title":"QuaRL: Quantization for Fast and Environmentally Sustainable Reinforcement Learning","year":"2022","author":"Krishnan","key":"ref151"}],"event":{"name":"2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","start":{"date-parts":[[2024,5,5]]},"location":"Indianapolis, IN, USA","end":{"date-parts":[[2024,5,7]]}},"container-title":["2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10589923\/10590014\/10590059.pdf?arnumber=10590059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T04:55:37Z","timestamp":1721364937000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10590059\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,5]]},"references-count":151,"URL":"http:\/\/dx.doi.org\/10.1109\/ispass61541.2024.00029","relation":{},"subject":[],"published":{"date-parts":[[2024,5,5]]}}}