{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:45:21Z","timestamp":1772725521906,"version":"3.50.1"},"reference-count":151,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,5]]},"DOI":"10.1109\/ispass61541.2024.00029","type":"proceedings-article","created":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T17:19:44Z","timestamp":1721150384000},"page":"217-229","source":"Crossref","is-referenced-by-count":9,"title":["SwiftRL: Towards Efficient Reinforcement Learning on Real Processing-In-Memory Systems"],"prefix":"10.1109","author":[{"given":"Kailash","family":"Gogineni","sequence":"first","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Sai Santosh","family":"Dayapule","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Juan","family":"G\u00f3mez-Luna","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Karthikeya","family":"Gogineni","sequence":"additional","affiliation":[{"name":"Independent"}]},{"given":"Peng","family":"Wei","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Tian","family":"Lan","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]},{"given":"Mohammad","family":"Sadrosadati","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Switzerland"}]},{"given":"Guru","family":"Venkataramani","sequence":"additional","affiliation":[{"name":"George Washington University,USA"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Reinforcement Learning: An Introduction. Second Ed","author":"Sutton","year":"2018","journal-title":"A Bradford Book"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref3","article-title":"Solving Rubiks Cube with a Robot Hand","author":"Akkaya","year":"2019","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"ref6","article-title":"A survey on offline reinforcement learning: Taxonomy, review, and open problems","author":"Figueiredo","year":"2023","journal-title":"IEEE TNNLS"},{"key":"ref7","article-title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems","author":"Levine","year":"2020","journal-title":"arXiv preprint"},{"key":"ref8","article-title":"An Optimistic Perspective on Offline Reinforcement Learning","author":"Agarwal","year":"2020","journal-title":"ICML"},{"key":"ref9","article-title":"Leveraging Fac-tored Action Spaces for Efficient Offline Reinforcement Learning in Healthcare","volume":"35","author":"Tang","year":"2022","journal-title":"NeurIPS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20393"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2019.01.009"},{"key":"ref12","article-title":"A Modern Primer on Processing in Memory","author":"Mutlu","year":"2022","journal-title":"Emerging Computing: From Devices to Systems: Looking Beyond Moore and Von Neumann"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173177"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00019"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2934048"},{"key":"ref16","article-title":"In-DRAM Bulk Bitwise Execution Engine","author":"Seshadri","year":"2019","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3323476"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875680"},{"key":"ref19","volume-title":"UPMEM Website","year":"2023"},{"key":"ref20","year":"2018","journal-title":"Introduction to UPMEM PIM. Processing-In-Memory (PIM) on DRAM Accelera-tor (White Paper)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IGSC54211.2021.9651614"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3174101"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI54635.2022.00064"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ispass57527.2023.00013"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS57527.2023.00031"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00017"},{"key":"ref29","article-title":"GAPiM: Discovering Genetic Variations on a Real Processing-in-Memory System","author":"Abecassis","year":"2023","journal-title":"bioRxiv"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00039"},{"key":"ref31","article-title":"Analysis of Distributed Optimization Algorithms on a Real Processing-In-Memory System","author":"Rhyner","year":"2024","journal-title":"arXiv preprint"},{"key":"ref32","article-title":"Accelerating Graph Neural Networks on Real Processing-In-Memory Systems","author":"Giannoula","year":"2024","journal-title":"arXiv preprint"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btad155"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC59245.2023.00030"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3202350"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/SOCC56010.2022.9908126"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3589258"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247915"},{"key":"ref39","article-title":"PIM-GPT: A Hybrid Process-in-Memory Ac-celerator for Autoregressive Transformers","author":"Wu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref41","article-title":"Sample Complexity of Asynchronous Q-Learning: Sharper Analysis and Variance Reduction","volume":"33","author":"Li","year":"2020","journal-title":"NeurIPS"},{"key":"ref42","article-title":"OpenAI GYM","author":"Brockman","year":"2016","journal-title":"arXiv preprint"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW50202.2020.00024"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref45","volume-title":"Intel\u00ae Advisor User Guide","year":"2021"},{"key":"ref46","article-title":"A Case Study of {Processing-In-Memory} in {off-the-Shelf} Systems","author":"Nider","year":"2021","journal-title":"USENIX ATC"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00059"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3373311"},{"key":"ref49","article-title":"Accelerating Time Series Analysis via Processing using Non-Volatile Memories","author":"Fernandez","year":"2022","journal-title":"arXiv preprint"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00029"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3592980.3595312"},{"key":"ref52","article-title":"PID-Comm: A Fast and Flexible Collective Communication Framework for Commodity Processing-in-DIMM Devices","author":"Noh","year":"2024","journal-title":"arXiv preprint"},{"key":"ref53","article-title":"CINM (Cinnamon): A Compilation Infrastructure for Heterogeneous Compute In-Memory and Compute Near-Memory Paradigms","author":"Khan","year":"2022","journal-title":"arXiv preprint"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243187"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3558481.3591070"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48803-0_13"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640428"},{"key":"ref58","author":"Mognol","journal-title":"Parallelization of the Banded Needleman & Wunsch Algorithm on UPMEM PiM Architecture for Long DNA Sequence Alignment"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00018"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3534056.3534946"},{"key":"ref61","article-title":"Processing in Storage Class Memory","author":"Nider","year":"2020","journal-title":"HotStorage"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICDEW58674.2023.00035"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF59524.2023.10476816"},{"key":"ref64","article-title":"BLAST on UPMEM","volume-title":"Ph.D. dissertation, INRIA Rennes-Bretagne Atlantique","author":"Lavenier","year":"2016"},{"key":"ref65","article-title":"Accelerating Regular Path Queries over Graph Database with Processing-in-Memory","author":"Ma","year":"2024","journal-title":"arXiv preprint ar Xiv"},{"key":"ref66","article-title":"The Landscape of Compute-near-memory and Compute-in-memory: A Research and Commercial Overview","author":"Khan","year":"2024","journal-title":"arXiv preprint"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365862"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.46506\/jica.2021.2.1.043"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3097700"},{"key":"ref70","article-title":"A 1ynm 1.25 V 8Gb, 16Gb\/s\/pin GDDR6-based Accelerator-in-Memory Supporting 1 TFLOPS MAC Operation and Various Activation Functions for Deep- Learning Applications","author":"Lee","year":"2022","journal-title":"ISSCC"},{"key":"ref71","article-title":"184QPS\/W 64Mb\/mm 2 3D Logic-to-DRAM Hybrid Bonding with Process-Near-Memory Engine for Recommendation System","author":"Niu","year":"2022","journal-title":"ISSCC"},{"key":"ref72","article-title":"Lecture 20: Graphics Processing Units","volume-title":"video recording available","author":"Mutlu","year":"2020"},{"key":"ref73","year":"2023","journal-title":"UPMEM User Manual. Version 2023.1.0"},{"key":"ref74","volume-title":"UPMEM Software Development Kit (SDK)","year":"2023"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"ref76","article-title":"A Pipelined, Shared Resource MIMD Computer","author":"Smith","year":"1986","journal-title":"Advanced computer architecture"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1117\/12.932535"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2024.3387472"},{"key":"ref79","article-title":"Playing Atari with Deep Reinforcement Learning","author":"Mnih","year":"2013","journal-title":"ar Xiv preprint"},{"key":"ref80","article-title":"StarCraft II: A New Challenge for Reinforcement Learning","author":"Vinyals","year":"2017","journal-title":"arXiv preprint"},{"key":"ref81","article-title":"MAC-PO: Multi-Agent Experience Replay via Collective Priority Optimization","author":"Mei","year":"2023","journal-title":"AAMAS"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP57973.2023.00041"},{"key":"ref83","article-title":"Towards Efficient Multi-Agent Learning Systems","author":"Gogineni","year":"2023","journal-title":"MLArchSys, ISCA"},{"key":"ref84","article-title":"Scalability Bottlenecks in Multi-Agent Reinforcement Learning Systems","author":"Gogineni","year":"2023","journal-title":"FastPath, ISPASS"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1016\/j.rser.2020.110618"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2941229"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50035-0"},{"key":"ref88","article-title":"Deep Q-Learning With Q-Matrix Transfer Learning for Novel Fire Evacuation Environment","volume":"51","author":"Sharma","year":"2020","journal-title":"IEEE SMC: Systems"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2961174"},{"key":"ref90","article-title":"Bridging Automated to Au-tonomous Cyber Defense: Foundational Analysis of Tabular Q-Learning","author":"Applebaum","year":"2022","journal-title":"AISec"},{"key":"ref91","article-title":"On the existence of fixed points for q-Iearning and sarsa in partially observable domains","author":"Perkins","year":"2002","journal-title":"ICML"},{"key":"ref92","volume-title":"On-line Q-Learning Using Connectionist Systems. University of Cambridge, Department of Engineering","volume":"37","author":"Rummery","year":"1994"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/3126495"},{"key":"ref94","article-title":"Pythia: A Customizable Hardware Pre fetching Framework Using Online Rein-forcement Learning","author":"Bera","year":"2021","journal-title":"MICRO"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.21"},{"key":"ref96","article-title":"A Q-Learning Based Self-Adaptive I\/O Communication for 2.5D Integrated Many-Core Microprocessor and Memory","author":"PD","year":"2015","journal-title":"IEEE TC"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8714869"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527442"},{"key":"ref99","article-title":"The Epoch-Greedy Algorithm for Contextual Multi-armed Bandits","volume":"20","author":"Langford","year":"2007","journal-title":"NeurIPS"},{"key":"ref100","author":"Caspi","year":"2017","journal-title":"Reinforcement Learning Coach"},{"key":"ref101","article-title":"Is Independent Learning All You Need in the StarCraft Multi-Agent Challenge?","author":"De Witt","year":"2020","journal-title":"arXiv preprint"},{"key":"ref102","article-title":"SMARTS: Scalable Multi-Agent Reinforcement Learning Training School for Autonomous Driving","author":"Zhou","year":"2021","journal-title":"CoRL"},{"key":"ref103","article-title":"Safe, Multi-Agent, Reinforce-ment Learning for Autonomous Driving","author":"Shalev-Shwartz","year":"2016","journal-title":"arXiv preprint"},{"key":"ref104","article-title":"Multi-Agent Connected Autonomous Driving using Deep Rein-forcement Learning","author":"Palanisamy","year":"2020","journal-title":"IJCNN"},{"key":"ref105","article-title":"Training Cooperative Agents for Multi-Agent Reinforcement Learning","author":"Bhalla","year":"2019","journal-title":"AAMAS"},{"key":"ref106","article-title":"Independent Learning in Stochastic Games","author":"Ozdaglar","year":"2021","journal-title":"International Congress of Mathematicians"},{"key":"ref107","article-title":"Deep reinforcement learning for autonomous driving: A survey","author":"Kiran","year":"2021","journal-title":"IEEE T-ITS"},{"key":"ref108","article-title":"CM3: Cooperative Multi-goal Multi-Stage Multi-Agent Reinforcement Learning","author":"Yang","year":"2018","journal-title":"arXiv preprint"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref110","article-title":"A Review of Safe Reinforcement Learning: Methods, Theory and Applications","author":"Gu","year":"2022","journal-title":"arXiv preprint"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1145\/318123.318229"},{"key":"ref113","volume-title":"Xeon\u00ae Silver 4110 Processor","author":"I. S. P."},{"key":"ref114","volume-title":"NVIDIA Ampere Architecture"},{"key":"ref115","article-title":"The Blessing of Heterogeneity in Federated Q- learning: Linear Speedup and Beyond","author":"Woo","year":"2023","journal-title":"arXiv preprint a rXiv"},{"key":"ref116","article-title":"Federated Reinforcement Learning: Tech- niques, Applications, and Open Challenges","author":"Qi","year":"2021","journal-title":"arXiv preprint"},{"key":"ref117","article-title":"Asynchronous Methods for Deep Reinforcement Learning","author":"Mnih","year":"2016","journal-title":"ICML"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415663"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/tc.2018.2876312"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045288"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045192"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00040"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196029"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1145\/3093315.3037702"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322237"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/jssc.2022.3200718"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.22"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/CAHPC.2018.8645905"},{"key":"ref129","article-title":"ORIGAMI: A Heterogeneous Split Architecture for In-Memory Acceleration of Learning","author":"Falahati","year":"2018","journal-title":"arXiv preprint"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1145\/3386263.3407649"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2022.3161126"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2022.102561"},{"key":"ref133","article-title":"Offloading embedding lookups to processing-in-memory for deep learning recommender models","volume-title":"Ph.D. dissertation","author":"Zarif","year":"2023"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1145\/3628599"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM49941.2020.9313351"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1007\/s13222-023-00456-z"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/3639046"},{"key":"ref138","article-title":"GA3C: GPU-based A3C for Deep Reinforcement Learning","author":"Babaeizadeh","year":"2017","journal-title":"ICLR"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304058"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"key":"ref141","article-title":"ACME: A Research Framework for Distributed Reinforcement Learning","author":"Hoffman","year":"2020","journal-title":"arXiv preprint"},{"key":"ref142","article-title":"Accelerated Methods for Deep Reinforcement Learning","author":"Stooke","year":"2018","journal-title":"arXiv preprint"},{"key":"ref143","article-title":"Efficient Parallel Methods for Deep Reinforcement Learning","author":"Clemente","year":"2017","journal-title":"arXiv preprint"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981441"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00038"},{"key":"ref146","article-title":"Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations","volume":"18","author":"Hubara","year":"2017","journal-title":"JMLR"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218516"},{"key":"ref148","article-title":"Improving Neural Network Quantization without Retraining using Outlier Channel Splitting","author":"Zhao","year":"2019","journal-title":"ICML"},{"key":"ref149","article-title":"Mixed Precision Training","author":"Micikevicius","year":"2017","journal-title":"arXiv preprint"},{"key":"ref150","article-title":"Low-Precision Reinforcement Learning: Running Soft Actor-Critic in Half Precision","author":"Bjorck","year":"2021","journal-title":"ICML"},{"key":"ref151","article-title":"QuaRL: Quantization for Fast and Environmentally Sustainable Reinforcement Learning","author":"Krishnan","year":"2022","journal-title":"TMLR"}],"event":{"name":"2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","location":"Indianapolis, IN, USA","start":{"date-parts":[[2024,5,5]]},"end":{"date-parts":[[2024,5,7]]}},"container-title":["2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10589923\/10590014\/10590059.pdf?arnumber=10590059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T04:55:37Z","timestamp":1721364937000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10590059\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,5]]},"references-count":151,"URL":"https:\/\/doi.org\/10.1109\/ispass61541.2024.00029","relation":{},"subject":[],"published":{"date-parts":[[2024,5,5]]}}}