{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:26:16Z","timestamp":1773156376397,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,6,22]],"date-time":"2019-06-22T00:00:00Z","timestamp":1561161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,6,22]]},"DOI":"10.1145\/3307650.3322259","type":"proceedings-article","created":{"date-parts":[[2019,6,14]],"date-time":"2019-06-14T12:42:33Z","timestamp":1560516153000},"page":"279-291","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":94,"title":["Accelerating distributed reinforcement learning with in-switch computing"],"prefix":"10.1145","author":[{"given":"Youjie","family":"Li","sequence":"first","affiliation":[{"name":"UIUC"}]},{"given":"Iou-Jen","family":"Liu","sequence":"additional","affiliation":[{"name":"UIUC"}]},{"given":"Yifan","family":"Yuan","sequence":"additional","affiliation":[{"name":"UIUC"}]},{"given":"Deming","family":"Chen","sequence":"additional","affiliation":[{"name":"UIUC"}]},{"given":"Alexander","family":"Schwing","sequence":"additional","affiliation":[{"name":"UIUC"}]},{"given":"Jian","family":"Huang","sequence":"additional","affiliation":[{"name":"UIUC"}]}],"member":"320","published-online":{"date-parts":[[2019,6,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Alexey Andreyev. 2014. Introducing Data Center Fabric the Next-Generation Facebook Data Center Network. https:\/\/code.fb.com\/production-engineering\/introducing-data-center-fabric-the-next-generation-\\facebook-data-center-network\/."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI.2010.16"},{"key":"e_1_3_2_1_3_1","unstructured":"Atari. 1972. Atari Games https:\/\/www.atari.com\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1879141.1879175"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2656877.2656890"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063419"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 25th International Conference on Neural Information Processing Systems (NIPS'12)","author":"Dean Jeffrey","unstructured":"Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Mark Mao, Marc aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, Quoc V. Le, and Andrew Y. Ng. 2012. Large Scale Distributed Deep Networks. In Proceedings of the 25th International Conference on Neural Information Processing Systems (NIPS'12). Lake Tahoe, NV."},{"key":"e_1_3_2_1_8_1","unstructured":"Prafulla Dhariwal Christopher Hesse Oleg Klimov Alex Nichol Matthias Plappert Alec Radford John Schulman Szymon Sidor Yuhuai Wu and Peter Zhokhov. {n.d.}. OpenAI Baselines. https:\/\/github.com\/openai\/baselines."},{"key":"e_1_3_2_1_9_1","unstructured":"Dulat Yerzat. 2018. DQN Adventure https:\/\/github.com\/higgsfield\/RL-Adventure."},{"key":"e_1_3_2_1_10_1","unstructured":"Facebook. 2018. Writing Distributed Applications with PyTorch https:\/\/pytorch.org\/tutorials\/intermediate\/dist_tuto.html."},{"key":"e_1_3_2_1_11_1","volume-title":"Horizon: Facebook's Open Source Applied Reinforcement Learning Platform. arXiv arXiv\/1811.00260","author":"Gauci Jason","year":"2018","unstructured":"Jason Gauci, Edoardo Conti, Yitao Liang, Kittipat Virochsiri, Yuchen He, Zachary Kaden, Vivek Narayanan, and Xiaohui Ye. 2018. Horizon: Facebook's Open Source Applied Reinforcement Learning Platform. arXiv arXiv\/1811.00260 (2018). https:\/\/arxiv.org\/abs\/1811.00260"},{"key":"e_1_3_2_1_12_1","volume-title":"Large Minibatch SGD: Training ImageNet in 1 Hour. arXiv arXiv\/1706.02677","author":"Goyal Priya","year":"2017","unstructured":"Priya Goyal, Piotr Doll\u00e1r, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. 2017. Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour. arXiv arXiv\/1706.02677 (2017). https:\/\/arxiv.org\/abs\/1706.02677"},{"key":"e_1_3_2_1_13_1","unstructured":"Network Working Group. 2001. Requirement for Comments: 3168 https:\/\/tools.ietf.org\/html\/rfc3168."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999611.2999748"},{"key":"e_1_3_2_1_16_1","unstructured":"Ilya Kostrikov. 2018. DDPG and NAF https:\/\/github.com\/ikostrikov\/pytorch-ddpg-naf."},{"key":"e_1_3_2_1_17_1","unstructured":"Ilya Kostrikov. 2018. Pytorch-A2C-PPO-Acktr https:\/\/github.com\/ikostrikov\/pytorch-a2c-ppo-acktr."},{"key":"e_1_3_2_1_18_1","unstructured":"Intel Corporation. 2017. Intel X540 https:\/\/www.intel.com\/content\/www\/us\/en\/ethernet-products\/converged-network-adapters\/ethernet-x540-t2-brief.html."},{"key":"e_1_3_2_1_19_1","unstructured":"Intel Corporation. 2017. Xeon CPU E5 https:\/\/www.intel.com\/content\/www\/us\/en\/products\/processors\/xeon\/e5-processors.html."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2984093.2984354"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1006\/jpdc.1996.0033"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/2968826.2968829"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00023"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 32nd Conference on Neural Information Processing Systems (NIPS'18)","author":"Li Youjie","year":"2018","unstructured":"Youjie Li, Mingchao Yu, Songze Li, Salman Avestimehr, Nam Sung Kim, and Alexander Schwing. 2018. Pipe-SGD: A Decentralized Pipelined SGD Framework for Distributed Deep Net Training. In Proceedings of the 32nd Conference on Neural Information Processing Systems (NIPS'18). Montreal, Canada."},{"key":"e_1_3_2_1_26_1","volume-title":"Asynchronous Decentralized Parallel Stochastic Gradient Descent. arXiv arXiv\/1710.06952v3","author":"Lian Xiangru","year":"2017","unstructured":"Xiangru Lian, Wei Zhang, Ce Zhang, and Ji Liu. 2017. Asynchronous Decentralized Parallel Stochastic Gradient Descent. arXiv arXiv\/1710.06952v3 (2017). http:\/\/arxiv.org\/abs\/1710.06952v3"},{"key":"e_1_3_2_1_27_1","volume-title":"Continuous Control with Deep Reinforcement Learning. arXiv abs\/1509.02971","author":"Lillicrap Timothy P.","year":"2015","unstructured":"Timothy P. Lillicrap, Jonathan J. Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous Control with Deep Reinforcement Learning. arXiv abs\/1509.02971 (2015). http:\/\/arxiv.org\/abs\/1509.02971"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446050"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3045390.3045594"},{"key":"e_1_3_2_1_30_1","volume-title":"Riedmiller","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin A. Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. arXiv arXiv\/1312.5602 (2013). http:\/\/arxiv.org\/abs\/1312.5602"},{"key":"e_1_3_2_1_31_1","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A. Rusu Joel Veness Marc G. Bellemare Alex Graves Martin Riedmiller Andreas K. Fidjeland Georg Ostrovski Stig Petersen Charles Beattie Amir Sadik Ioannis Antonoglou Helen King Dharshan Kumaran Daan Wierstra Shane Legg and Demis Hassabis. 2015. Human-level control through deep reinforcement learning. In Nature."},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, William Paul, Michael I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18). Carlsbad, CA."},{"key":"e_1_3_2_1_33_1","volume-title":"Vedavyas Panneershelvam, Mustafa Suleyman, Charles Beattie, Stig Petersen, Shane Legg, Volodymyr Mnih, Koray Kavukcuoglu, and David Silver.","author":"Nair Arun","year":"2015","unstructured":"Arun Nair, Praveen Srinivasan, Sam Blackwell, Cagdas Alcicek, Rory Fearon, Alessandro De Maria, Vedavyas Panneershelvam, Mustafa Suleyman, Charles Beattie, Stig Petersen, Shane Legg, Volodymyr Mnih, Koray Kavukcuoglu, and David Silver. 2015. Massively Parallel Methods for Deep Reinforcement Learning. arXiv arXiv\/1507.04296 (2015). http:\/\/arxiv.org\/abs\/1507.04296"},{"key":"e_1_3_2_1_34_1","unstructured":"NetFPGA-SUME. 2014. https:\/\/netfpga.org\/site\/#\/systems\/1netfpga-sume\/details\/."},{"key":"e_1_3_2_1_35_1","unstructured":"NetFPGA SUME Team. 2019. NetFPGA-SUME-Wiki https:\/\/github.com\/NetFPGA\/NetFPGA-SUME-public."},{"key":"e_1_3_2_1_36_1","unstructured":"NETGEAR Corporation. 2017. ProSafe XS712T SWITCH https:\/\/www.netgear.com\/support\/product\/xs712t.aspx."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3102980.3102998"},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA Corporation. 2018. NVIDIA CUDA C Programming Guide."},{"key":"e_1_3_2_1_39_1","unstructured":"NVIDIA Corporation. 2018. NVIDIA CuDNN https:\/\/developer.nvidia.com\/cudnn."},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA Corporation. 2019. NVIDIA TITAN RTX https:\/\/www.nvidia.com\/en-us\/titan\/titan-rtx\/."},{"key":"e_1_3_2_1_41_1","unstructured":"OpenAI. 2017. OpenAI Baselines: ACKTR & A2C. https:\/\/blog.openai.com\/baselines-acktr-a2c\/"},{"key":"e_1_3_2_1_42_1","unstructured":"OpenAI. 2018. OpenAI: GYM https:\/\/gym.openai.com\/."},{"key":"e_1_3_2_1_43_1","unstructured":"OpenMPI Community. 2017. OpenMPI: A High Performance Message Passing Library https:\/\/www.open-mpi.org\/."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123979"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS '17)","author":"Paszke Adam","year":"2017","unstructured":"Adam Paszke, Sam Gross, Soumith Chintala, Gregory Chanan, Edward Yang, Zachary DeVito, Zeming Lin, Alban Desmaison, Luca Antiga, and Adam Lerer. 2017. Automatic Differentiation in PyTorch. In Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS '17)."},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the 24th International Conference on Neural Information Processing Systems (NIPS'11)","author":"Recht Benjamin","year":"2011","unstructured":"Benjamin Recht, Christopher Re, Stephen Wright, and Feng Niu. 2011. Hogwild: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent. In Proceedings of the 24th International Conference on Neural Information Processing Systems (NIPS'11)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787472"},{"key":"e_1_3_2_1_48_1","volume-title":"Proximal Policy Optimization Algorithms. arXiv arXiv\/1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arXiv arXiv\/1707.06347 (2017). http:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934886"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787508"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_52_1","volume-title":"MuJoCo: A Physics Engine for Model-Based Control. In IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"Todorov Emanuel","year":"2015","unstructured":"Emanuel Todorov, Tom Erez, and Yuval Tassa. 2015. MuJoCo: A Physics Engine for Model-Based Control. In IEEE\/RSJ International Conference on Intelligent Robots and Systems. Vilamoura, Portugal."},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings of the 9th International Symposium on Distributed Autonomous Robotic Systems (DARS)","author":"Varshavskaya Paulina","year":"2008","unstructured":"Paulina Varshavskaya, Leslie Pack Kaelbling, and Daniela Rus. 2008. Efficient Distributed Reinforcement Learning Through Agreement. In Proceedings of the 9th International Symposium on Distributed Autonomous Robotic Systems (DARS). Tsukuba, Japan."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2016.7527245"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.09.071"},{"key":"e_1_3_2_1_56_1","volume-title":"Proceedings of the 32nd Conference on Neural Information Processing Systems (NIPS '18)","author":"Yu Mingchao","year":"2018","unstructured":"Mingchao Yu, Zhifeng Lin, Krishna Giri Narra, Songze Li, Youjie Li, Nam Sung Kim, Alexander Schwing, Murali Annavaram, and Salman Avestimehr. 2018. GradiVeQ: Vector Quantization for Bandwidth-Efficient Gradient Aggregation in Distributed CNN Training. In Proceedings of the 32nd Conference on Neural Information Processing Systems (NIPS '18). Montreal, Canada."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190551"},{"key":"e_1_3_2_1_58_1","volume-title":"High-Performance Video Content Recognition with Long-Term Recurrent Convolutional Network for FPGA. In 2017 27th International Conference on Field Programmable Logic and Applications (FPL 17)","author":"Zhang Xiaofan","year":"2017","unstructured":"Xiaofan Zhang, Xinheng Liu, Anand Ramachandran, Chuanhao Zhuge, Shibin Tang, Peng Ouyang, Zuofu Cheng, Kyle Rupnow, and Deming Chen. 2017. High-Performance Video Content Recognition with Long-Term Recurrent Convolutional Network for FPGA. In 2017 27th International Conference on Field Programmable Logic and Applications (FPL 17). Ghent, Belgium."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240801"}],"event":{"name":"ISCA '19: The 46th Annual International Symposium on Computer Architecture","location":"Phoenix Arizona","acronym":"ISCA '19","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE-CS\\DATC IEEE Computer Society"]},"container-title":["Proceedings of the 46th International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3307650.3322259","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3307650.3322259","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:54:06Z","timestamp":1750204446000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3307650.3322259"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,22]]},"references-count":59,"alternative-id":["10.1145\/3307650.3322259","10.1145\/3307650"],"URL":"https:\/\/doi.org\/10.1145\/3307650.3322259","relation":{},"subject":[],"published":{"date-parts":[[2019,6,22]]},"assertion":[{"value":"2019-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}