{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:05:01Z","timestamp":1773248701341,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607102","type":"proceedings-article","created":{"date-parts":[[2023,10,30]],"date-time":"2023-10-30T20:34:48Z","timestamp":1698698088000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":30,"title":["Calculon: a methodology and tool for high-level co-design of systems and large language models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2599-9741","authenticated-orcid":false,"given":"Mikhail","family":"Isaev","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2298-1489","authenticated-orcid":false,"given":"Nic","family":"Mcdonald","sequence":"additional","affiliation":[{"name":"Nvidia, Salt Lake City, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5533-1083","authenticated-orcid":false,"given":"Larry","family":"Dennison","sequence":"additional","affiliation":[{"name":"Nvidia, Westford, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2178-138X","authenticated-orcid":false,"given":"Richard","family":"Vuduc","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00051"},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of Neuro-N\u00eemes 91","author":"Bottou L\u00e9on","year":"1991","unstructured":"L\u00e9on Bottou. 1991. Stochastic Gradient Learning in Neural Networks. In Proceedings of Neuro-N\u00eemes 91. EC2, Nimes, France. http:\/\/leon.bottou.org\/papers\/bottou-91c"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3495883"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde de Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. 10.48550\/ARXIV.2107.03374","DOI":"10.48550\/ARXIV.2107.03374"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","unstructured":"Tianqi Chen Bing Xu Chiyuan Zhang and Carlos Guestrin. 2016. Training Deep Nets with Sublinear Memory Cost. 10.48550\/ARXIV.1604.06174","DOI":"10.48550\/ARXIV.1604.06174"},{"key":"e_1_3_2_2_6_1","unstructured":"Aakanksha Chowdhery Sharan Narang Jacob Devlin Maarten Bosma Gaurav Mishra Adam Roberts Paul Barham Hyung Won Chung Charles Sutton Sebastian Gehrmann Parker Schuh Kensen Shi Sasha Tsvyashchenko Joshua Maynez Abhishek Rao Parker Barnes Yi Tay Noam Shazeer Vinodkumar Prabhakaran Emily Reif Nan Du Ben Hutchinson Reiner Pope James Bradbury Jacob Austin Michael Isard Guy Gur-Ari Pengcheng Yin Toju Duke Anselm Levskaya Sanjay Ghemawat Sunipa Dev Henryk Michalewski Xavier Garcia Vedant Misra Kevin Robinson Liam Fedus Denny Zhou Daphne Ippolito David Luan Hyeontaek Lim Barret Zoph Alexander Spiridonov Ryan Sepassi David Dohan Shivani Agrawal Mark Omernick Andrew M. Dai Thanumalayan Sankaranarayana Pillai Marie Pellat Aitor Lewkowycz Erica Moreira Rewon Child Oleksandr Polozov Katherine Lee Zongwei Zhou Xuezhi Wang Brennan Saeta Mark Diaz Orhan Firat Michele Catasta Jason Wei Kathy Meier-Hellstern Douglas Eck Jeff Dean Slav Petrov and Noah Fiedel. 2022. PaLM: Scaling Language Modeling with Pathways. arXiv:2204.02311 [cs.CL]"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the 25th International Conference on Neural Information Processing Systems -","volume":"1","author":"Dean Jeffrey","unstructured":"Jeffrey Dean, Greg S. Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Quoc V. Le, Mark Z. Mao, Marc'Aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, and Andrew Y. Ng. 2012. Large Scale Distributed Deep Networks. In Proceedings of the 25th International Conference on Neural Information Processing Systems - Volume 1 (Lake Tahoe, Nevada) (NIPS'12). Curran Associates Inc., Red Hook, NY, USA, 1223--1231."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441593"},{"key":"e_1_3_2_2_9_1","volume-title":"XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla","author":"LLC.","year":"2022","unstructured":"Google, LLC. 2022. XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/347837.347846"},{"key":"e_1_3_2_2_11_1","unstructured":"Jordan Hoffmann Sebastian Borgeaud Arthur Mensch Elena Buchatskaya Trevor Cai Eliza Rutherford Diego de Las Casas Lisa Anne Hendricks Johannes Welbl Aidan Clark Tom Hennigan Eric Noland Katie Millican George van den Driessche Bogdan Damoc Aurelia Guy Simon Osindero Karen Simonyan Erich Elsen Jack W. Rae Oriol Vinyals and Laurent Sifre. 2022. Training Compute-Optimal Large Language Models. arXiv:2203.15556 [cs.CL]"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"key":"e_1_3_2_2_13_1","volume-title":"Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V. Le, Yonghui Wu, and Zhifeng Chen.","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Mia Xu Chen, Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V. Le, Yonghui Wu, and Zhifeng Chen. 2019. GPipe: Efficient Training of Giant Neural Networks Using Pipeline Parallelism. Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545069"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Jain Arpan","year":"2020","unstructured":"Arpan Jain, Ammar Ahmad Awan, Asmaa M. Aljuhani, Jahanzeb Maqbool Hashmi, Quentin G. Anthony, Hari Subramoni, Dhableswar K. Panda, Raghu Machiraju, and Anil Parwani. 2020. GEMS: GPU-enabled memory-Aware Model-Parallelism system for Distributed DNN Training. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (Atlanta, Georgia) (SC '20). IEEE Press, Article 45, 15 pages."},{"key":"e_1_3_2_2_16_1","first-page":"497","article-title":"Checkmate","volume":"2020","author":"Jain Paras","year":"2020","unstructured":"Paras Jain, Ajay Jain, Aniruddha Nrusimha, Amir Gholami, Pieter Abbeel, Joseph Gonzalez, Kurt Keutzer, and Ion Stoica. 2020. Checkmate: Breaking the Memory Wall with Optimal Tensor Rematerialization. In Proceedings of Machine Learning and Systems 2020. 497--511.","journal-title":"Breaking the Memory Wall with Optimal Tensor Rematerialization. In Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080246"},{"key":"e_1_3_2_2_19_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","unstructured":"Vijay Korthikanti Jared Casper Sangkug Lym Lawrence McAfee Michael Andersch Mohammad Shoeybi and Bryan Catanzaro. 2022. Reducing Activation Recomputation in Large Transformer Models. 10.48550\/ARXIV.2205.05198","DOI":"10.48550\/ARXIV.2205.05198"},{"key":"e_1_3_2_2_21_1","volume-title":"Weinberger (Eds.)","volume":"25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems, F. Pereira, C.J. Burges, L. Bottou, and K.Q. Weinberger (Eds.), Vol. 25. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2012\/file\/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf"},{"key":"e_1_3_2_2_22_1","volume-title":"CoRR abs\/1909.09756","author":"Kumar Sameer","year":"2019","unstructured":"Sameer Kumar, Victor Bitorff, Dehao Chen, Chiachen Chou, Blake A. Hechtman, HyoukJoong Lee, Naveen Kumar, Peter Mattson, Shibo Wang, Tao Wang, Yuanzhong Xu, and Zongwei Zhou. 2019. Scale MLPerf-0.6 models on Google TPU-v3 Pods. CoRR abs\/1909.09756 (2019). arXiv:1909.09756 http:\/\/arxiv.org\/abs\/1909.09756"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2017.00030"},{"key":"e_1_3_2_2_24_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=qrwe7XHTmYb","author":"Lepikhin Dmitry","year":"2021","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2021. {GS}hard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=qrwe7XHTmYb"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.29"},{"key":"e_1_3_2_2_26_1","unstructured":"Nenad Marku\u0161. 2018. Fusing batch normalization and convolution in runtime. https:\/\/nenadmarkus.com\/p\/fusing-batchnorm-and-conv\/"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2018.00017"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"e_1_3_2_2_30_1","unstructured":"NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture. https:\/\/resources.nvidia.com\/en-us-tensor-core"},{"key":"e_1_3_2_2_31_1","unstructured":"NVIDIA. 2022. NVIDIA H100 Tensor Core GPU Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidiaampere-architecture-whitepaper.pdf"},{"key":"e_1_3_2_2_32_1","unstructured":"NVIDIA. 2022. NVLink and NVSwitch. https:\/\/www.nvidia.com\/en-us\/data-center\/nvlink\/"},{"key":"e_1_3_2_2_33_1","unstructured":"NVIDIA. 2023. NVIDIA Deep Learning Performance. https:\/\/docs.nvidia.com\/deeplearning\/performance\/dl-performance-matrix-multiplication\/index.html"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","unstructured":"David Patterson Joseph Gonzalez Quoc Le Chen Liang Lluis-Miquel Munguia Daniel Rothchild David So Maud Texier and Jeff Dean. 2021. Carbon Emissions and Large Neural Network Training. 10.48550\/ARXIV.2104.10350","DOI":"10.48550\/ARXIV.2104.10350"},{"key":"e_1_3_2_2_35_1","unstructured":"Reiner Pope Sholto Douglas Aakanksha Chowdhery Jacob Devlin James Bradbury Anselm Levskaya Jonathan Heek Kefan Xiao Shivani Agrawal and Jeff Dean. 2022. Efficiently Scaling Transformer Inference. arXiv:2211.05102 [cs.LG]"},{"key":"e_1_3_2_2_36_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Rajbhandari Samyam","year":"2020","unstructured":"Samyam Rajbhandari, Jeff Rasley, Olatunji Ruwase, and Yuxiong He. 2020. ZeRO: Memory Optimizations toward Training Trillion Parameter Models. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (Atlanta, Georgia) (SC '20). IEEE Press, Article 20, 16 pages."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. 10.48550\/ARXIV.2204.06125","DOI":"10.48550\/ARXIV.2204.06125"},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8821--8831. https:\/\/proceedings.mlr.press\/v139\/ramesh21a.html"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_2_42_1","volume-title":"ZeRO-Offload: Democratizing Billion-Scale Model Training. In 2021 USENIX Annual Technical Conference, USENIX ATC 2021","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. ZeRO-Offload: Democratizing Billion-Scale Model Training. In 2021 USENIX Annual Technical Conference, USENIX ATC 2021, July 14--16, 2021, Irina Calciu and Geoff Kuenning (Eds.). USENIX Association, 551--564. https:\/\/www.usenix.org\/conference\/atc21\/presentation\/ren-jie"},{"key":"e_1_3_2_2_43_1","unstructured":"Noam Shazeer Youlong Cheng Niki Parmar Dustin Tran Ashish Vaswani Penporn Koanantakool Peter Hawkins HyoukJoong Lee Mingsheng Hong Cliff Young Ryan Sepassi and Blake Hechtman. 2018. Mesh-TensorFlow: Deep Learning for Supercomputers. arXiv:1811.02084 [cs.LG]"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2019. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. 10.48550\/ARXIV.1909.08053","DOI":"10.48550\/ARXIV.1909.08053"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2201.11990"},{"key":"e_1_3_2_2_46_1","volume-title":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","author":"Sun Xiaoyang","year":"2022","unstructured":"Xiaoyang Sun, Wei Wang, Shenghao Qiu, Renyu Yang, Songfang Huang, Jie Xu, and Zheng Wang. 2022. StrongHold: Fast and Affordable Billion-Scale Deep Learning Model Training. In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (Dallas, Texas) (SC '22). IEEE Press, Article 71, 17 pages."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497020"},{"key":"e_1_3_2_2_48_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arXiv:2302.13971 [cs.CL]"},{"key":"e_1_3_2_2_49_1","volume-title":"Attention is All You Need (NIPS'17)","author":"Vaswani Ashish","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All You Need (NIPS'17). Curran Associates Inc., Red Hook, NY, USA, 6000--6010."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00077"},{"key":"e_1_3_2_2_51_1","volume-title":"Proceedings of Machine Learning and Systems, A. Smola, A. Dimakis, and I. Stoica (Eds.)","volume":"3","author":"Wang Guanhua","year":"2021","unstructured":"Guanhua Wang, Kehan Wang, Kenan Jiang, XIANGJUN LI, and Ion Stoica. 2021. Wavelet: Efficient DNN Training with Tick-Tock Scheduling. In Proceedings of Machine Learning and Systems, A. Smola, A. Dimakis, and I. Stoica (Eds.), Vol. 3. 696--710. https:\/\/proceedings.mlsys.org\/paper\/2021\/file\/c81e728d9d4c2f636f067f89cc14862c-Paper.pdf"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567959"},{"key":"e_1_3_2_2_53_1","volume-title":"Compiler-Assisted Source-to-Source Skeletonization of Application Models for System Simulation","author":"Wilke Jeremiah J.","unstructured":"Jeremiah J. Wilke, Joseph P. Kenny, Samuel Knight, and Sebastien Rumley. 2018. Compiler-Assisted Source-to-Source Skeletonization of Application Models for System Simulation. In High Performance Computing, Rio Yokota, Mich\u00e8le Weiland, David Keyes, and Carsten Trinitis (Eds.). Springer International Publishing, Cham, 123--143."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2105.04663"},{"key":"e_1_3_2_2_55_1","volume-title":"Touretzky (Ed.)","volume":"2","author":"Zhang Xiru","year":"1989","unstructured":"Xiru Zhang, Michael McKenna, Jill Mesirov, and David Waltz. 1989. An Efficient Implementation of the Back-propagation Algorithm on the Connection Machine CM-2. In Advances in Neural Information Processing Systems, D. Touretzky (Ed.), Vol. 2. Morgan-Kaufmann. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1989\/file\/e3796ae838835da0b6f6ea37bcf8bcb7-Paper.pdf"},{"key":"e_1_3_2_2_56_1","volume-title":"Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zheng Lianmin","year":"2022","unstructured":"Lianmin Zheng, Zhuohan Li, Hao Zhang, Yonghao Zhuang, Zhifeng Chen, Yanping Huang, Yida Wang, Yuanzhong Xu, Danyang Zhuo, Eric P. Xing, Joseph E. Gonzalez, and Ion Stoica. 2022. Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 559--578. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/zhenglianmin"},{"key":"e_1_3_2_2_57_1","volume-title":"Proceedings of Machine Learning and Systems, D. Marculescu, Y. Chi, and C. Wu (Eds.)","volume":"4","author":"Zhou Yanqi","year":"2022","unstructured":"Yanqi Zhou, Xuanyi Dong, Tianjian Meng, Mingxing Tan, Berkin Akin, Daiyi Peng, Amir Yazdanbakhsh, Da Huang, Ravi Narayanaswami, and James Laudon. 2022. Towards the Co-design of Neural Networks and Accelerators. In Proceedings of Machine Learning and Systems, D. Marculescu, Y. Chi, and C. Wu (Eds.), Vol. 4. 141--152. https:\/\/proceedings.mlsys.org\/paper\/2022\/file\/31fefc0e570cb3860f2a6d4b38c6490d-Paper.pdf"}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607102","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607102","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:23Z","timestamp":1750178183000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607102"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":57,"alternative-id":["10.1145\/3581784.3607102","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607102","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}