{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T20:09:01Z","timestamp":1768680541538,"version":"3.49.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1109\/cicc57935.2023.10121242","type":"proceedings-article","created":{"date-parts":[[2023,5,11]],"date-time":"2023-05-11T17:23:55Z","timestamp":1683825835000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["AI SoC Design Challenges in the Foundation Model Era"],"prefix":"10.1109","author":[{"given":"Zhengyu","family":"Chen","sequence":"first","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Dawei","family":"Huang","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Mingran","family":"Wang","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Bowen","family":"Yang","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Jinuk Luke","family":"Shin","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Changran","family":"Hu","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Raghu","family":"Prabhakar","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Gao","family":"Deng","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Yongning","family":"Sheng","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Sihua","family":"Fu","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Lu","family":"Yuan","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Tian","family":"Zhao","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Yun","family":"Du","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Chen","family":"Liu","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Jun","family":"Yang","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Viren","family":"Shah","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Venkat","family":"Srinivasan","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]},{"given":"Sumti","family":"Jairath","sequence":"additional","affiliation":[{"name":"SambaNova Systems,Palo Alto,CA"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Curran Associates, Inc.","volume":"33","author":"Tom"},{"key":"ref2","author":"Alec","year":"2019","journal-title":"Language models are unsupervised multitask learners"},{"key":"ref3","article-title":"Liu","author":"Colin","year":"2019","journal-title":"Exploring the limits of transfer learning with a unified text-to-text transformer. CoRR, abs\/1910.10683"},{"key":"ref4","article-title":"On the opportunities and risks of foundation models","author":"Rishi","year":"2021","journal-title":"CoRR, abs\/2108.07258"},{"key":"ref5","year":"2022","journal-title":"Chatgpt: Optimizing language models for dialogue"},{"key":"ref6","volume":"2022","author":"Jason","journal-title":"Emergent abilities of large language models"},{"key":"ref7","article-title":"Scaling laws for neural language models","author":"Jared","year":"2020","journal-title":"CoRR"},{"key":"ref8","article-title":"The LAMBADA dataset: Word prediction requiring a broad discourse context","author":"Denis","year":"2016","journal-title":"CoRR, abs\/1606.06031"},{"key":"ref9","author":"Jaime","journal-title":"Compute trends across three eras of machine learning, 2022"},{"key":"ref10","article-title":"Kalamkar, et al","author":"Dhiraj","year":"2019","journal-title":"A study of BFLOAT16 for deep learning training. CoRR, abs\/1905.12322"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICSCC.2019.8843652"},{"key":"ref12","author":"Ning-Chi","year":"2020","journal-title":"Efficient systolic array based on decomposable MAC for quantized deep neural networks"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/85.194088"},{"key":"ref14","author":"Siddharth","year":"2022","journal-title":"Instruction tuning for few-shot aspect-based sentiment analysis"},{"key":"ref15","article-title":"Hycube: A CGRA with reconfigurable singlecycle multi-hop interconnect","volume-title":"In 2017 54th ACM\/EDAC\/IEEE Design Automation Conference","author":"Manupa"},{"key":"ref16","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Jacob","year":"2018","journal-title":"CoRR, abs\/1810.04805"},{"key":"ref17","article-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism","author":"Mohammad","year":"2019","journal-title":"arXiv:1909.08053"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/mm.2021.3058217"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42614.2022.9731612"},{"issue":"120","key":"ref20","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"William","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref21","author":"Tianqi","year":"2016","journal-title":"Training deep nets with sublinear memory cost."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/sc41405.2020.00024"},{"key":"ref23","article-title":"Efficient large-scale language model training on gpu clusters using megatron-lm","author":"Deepak","year":"2021","journal-title":"SC 21"},{"key":"ref24","first-page":"1","article-title":"Beyond data and model parallelism for deep neural networks","volume-title":"Proceedings of Machine Learning and Systems","volume":"1","author":"Zhihao"},{"key":"ref25","article-title":"Alpa: Automating inter- and intra-operator parallelism for distributed deep learning","author":"Lianmin","year":"2022","journal-title":"CoRR, abs\/2201.12023"},{"key":"ref26","article-title":"PipeDream: Fast and efficient pipeline parallel DNN training","author":"Aaron","year":"2018","journal-title":"arXiv preprint arXiv:1806.03377"},{"key":"ref27","article-title":"Gpipe: Efficient training of giant neural networks using pipeline parallelism","author":"Yanping","year":"2018","journal-title":"arXiv preprint arXiv:1811.06965"},{"key":"ref28","first-page":"269","article-title":"Pipemare: Asynchronous pipeline parallel DNN training","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Bowen"},{"key":"ref29","article-title":"Attention is all you need","volume-title":"Advances in Neural Information Processing.Systems, volume 30. Curran Associates, Inc.","author":"Ashish"},{"issue":"2","key":"ref30","first-page":"95","article-title":"Elster and Tor A","volume":"24","author":"Anne","year":"2022","journal-title":"Haugdahl. Nvidia hopper gpu and grace cpu highlights. Computing in Science & Engineering"},{"key":"ref31","author":"Zhen","year":"2022","journal-title":"Mics: Near-linear scaling for training gigantic model on public cloud."},{"key":"ref32","first-page":"511571","article-title":"GenSLMs: Genome-scale language models reveal SARS-CoV-2 evolutionary dynamics","volume":"23","author":"Zvyagin","year":"2022","journal-title":"bioRxiv [Preprint]. 2022 Nov"},{"key":"ref33","author":"Naigang","year":"2018","journal-title":"Training deep neural networks with 8-bit floating point numbers"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/arith.2019.00023"},{"key":"ref35","author":"Paulius","year":"2017","journal-title":"Mixed precision training"},{"key":"ref36","article-title":"Aberger, and Christopher De Sa","author":"Pedram","year":"2020","journal-title":"Revisiting bfloat16 training"},{"key":"ref37","author":"Tri","year":"2022","journal-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness"},{"key":"ref38","author":"Liu","journal-title":"Transformer acceleration with dynamic sparse attention, 2021"},{"key":"ref39","author":"Aojun","journal-title":"Learning n:m fine-grained structured sparse neural networks from scratch, 2021"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.23919\/VLSICircuits52068.2021.9492489"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/vlsicircuits18222.2020.9162829"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/isscc.2019.8662311"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2019.8662340"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42613.2021.9366045"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42613.2021.9365766"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/jssc.2018.2883394"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42614.2022.9731681"}],"event":{"name":"2023 IEEE Custom Integrated Circuits Conference (CICC)","location":"San Antonio, TX, USA","start":{"date-parts":[[2023,4,23]]},"end":{"date-parts":[[2023,4,26]]}},"container-title":["2023 IEEE Custom Integrated Circuits Conference (CICC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10121189\/10121178\/10121242.pdf?arnumber=10121242","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T06:23:18Z","timestamp":1706077398000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10121242\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/cicc57935.2023.10121242","relation":{},"subject":[],"published":{"date-parts":[[2023,4]]}}}