{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T16:37:00Z","timestamp":1781887020494,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":101,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731101","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:46:17Z","timestamp":1750437977000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["WSC-LLM: Efficient LLM Service and Architecture Co-exploration for Wafer-scale Chips"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5264-2818","authenticated-orcid":false,"given":"Zheng","family":"Xu","sequence":"first","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1087-307X","authenticated-orcid":false,"given":"Dehao","family":"Kong","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9405-8548","authenticated-orcid":false,"given":"Jiaxin","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4841-3798","authenticated-orcid":false,"given":"Jinxi","family":"Li","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9504-9611","authenticated-orcid":false,"given":"Jingxiang","family":"Hou","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4702-0635","authenticated-orcid":false,"given":"Xu","family":"Dai","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6218-4659","authenticated-orcid":false,"given":"Chao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5117-7920","authenticated-orcid":false,"given":"Shaojun","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6942-4395","authenticated-orcid":false,"given":"Yang","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8438-8588","authenticated-orcid":false,"given":"Shouyi","family":"Yin","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, BNRist, Tsinghua University, Beijing, China and Shanghai AI Laboratory, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"[n. d.]. Azure Public Dataset. [Online]. Available: https:\/\/github.com\/Azure\/AzurePublicDataset\/tree\/master."},{"key":"e_1_3_3_1_3_2","unstructured":"[n. d.]. Claude. [Online]. Available: https:\/\/claude.ai\/."},{"key":"e_1_3_3_1_4_2","unstructured":"[n. d.]. Gemini. [Online]. Available: https:\/\/gemini.google.com\/."},{"key":"e_1_3_3_1_5_2","unstructured":"[n. d.]. Grok. [Online]. Available: https:\/\/x.ai\/blog\/grok-1.5."},{"key":"e_1_3_3_1_6_2","unstructured":"[n. d.]. Mask \/ reticle. [Online]. Available: https:\/\/en.wikichip.org\/wiki\/mask."},{"key":"e_1_3_3_1_7_2","unstructured":"[n. d.]. NVIDIA GB200 NVL72: Powering the new era of computing. [Online]. Available: https:\/\/www.nvidia.com\/en-us\/data-center\/gb200-nvl72\/."},{"key":"e_1_3_3_1_8_2","unstructured":"[n. d.]. Nvidia H100. [Online]. Available: https:\/\/www.nvidia.com\/en-us\/data-center\/h100."},{"key":"e_1_3_3_1_9_2","unstructured":"2023. Bard an experiment by google. [Online]. Available: https:\/\/bard.google.com\/."},{"key":"e_1_3_3_1_10_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat Red Avila Igor Babuschkin Suchir Balaji Valerie Balcom Paul Baltescu Haiming Bao Mohammad Bavarian Jeff Belgum Irwan Bello Jake Berdine Gabriel Bernadett-Shapiro Christopher Berner Lenny Bogdonoff Oleg Boiko Madelaine Boyd Anna-Luisa Brakman Greg Brockman Tim Brooks Miles Brundage Kevin Button Trevor Cai Rosie Campbell Andrew Cann Brittany Carey Chelsea Carlson Rory Carmichael Brooke Chan Che Chang Fotis Chantzis Derek Chen Sully Chen Ruby Chen Jason Chen Mark Chen Ben Chess Chester Cho Casey Chu Hyung Won Dave Cummings Jeremiah Currier Yunxing Dai Cory Decareaux Thomas Degry Noah Deutsch Damien Deville Arka Dhar David Dohan Steve Dowling Sheila Dunning Adrien Ecoffet Atty Eleti Tyna Eloundou David Farhi Liam Fedus Niko Felix Sim\u00f3n Posada Juston Forte Isabella Fulford Leo Gao Elie Georges Christian Gibson Vik Goel Tarun Gogineni Gabriel Goh Rapha Gontijo-Lopes Jonathan Gordon Morgan Grafstein Scott Gray Ryan Greene Joshua Gross Shixiang Shane Yufei Guo Chris Hallacy Jesse Han Jeff Harris Yuchen He Mike Heaton Johannes Heidecke Chris Hesse Alan Hickey Wade Hickey Peter Hoeschele Brandon Houghton Kenny Hsu Shengli Hu Xin Hu Joost Huizinga Shantanu Jain Shawn Jain Joanne Jang Angela Jiang Roger Jiang Haozhun Jin Denny Jin Shino Jomoto Billie Jonn Heewoo Jun Tomer Kaftan \u0141ukasz Kaiser Ali Kamali Ingmar Kanitscheider Nitish Shirish Tabarak Khan Logan Kilpatrick Jong Wook Christina Kim Yongjik Kim Jan Hendrik Jamie Kiros Matt Knight Daniel Kokotajlo \u0141ukasz Kondraciuk Andrew Kondrich Aris Konstantinidis Kyle Kosic Gretchen Krueger Vishal Kuo Michael Lampe Ikai Lan Teddy Lee Jan Leike Jade Leung Daniel Levy Chak Ming Rachel Lim Molly Lin Stephanie Lin Mateusz Litwin Theresa Lopez Ryan Lowe Patricia Lue Anna Makanju Kim Malfacini Sam Manning Todor Markov Yaniv Markovski Bianca Martin Katie Mayer Andrew Mayne Bob McGrew Scott Mayer Christine McLeavey Paul McMillan Jake McNeil David Medina Aalok Mehta Jacob Menick Luke Metz Andrey Mishchenko Pamela Mishkin Vinnie Monaco Evan Morikawa Daniel Mossing Tong Mu Mira Murati Oleg Murk David M\u00e9ly Ashvin Nair Reiichiro Nakano Rajeev Nayak Arvind Neelakantan Richard Ngo Hyeonwoo Noh Long Ouyang Cullen O\u2019Keefe Jakub Pachocki Alex Paino Joe Palermo Ashley Pantuliano Giambattista Parascandolo Joel Parish Emy Parparita Alex Passos Mikhail Pavlov Andrew Peng Adam Perelman Filipe de Michael Petrov Henrique Ponde Michael (Rai)Pokorny Michelle Pokrass Vitchyr H. Tolly Powell Alethea Power Boris Power Elizabeth Proehl Raul Puri Alec Radford Jack Rae Aditya Ramesh Cameron Raymond Francis Real Kendra Rimbach Carl Ross Bob Rotsted Henri Roussez Nick Ryder Mario Saltarelli Ted Sanders Shibani Santurkar Girish Sastry Heather Schmidt David Schnurr John Schulman Daniel Selsam Kyla Sheppard Toki Sherbakov Jessica Shieh Sarah Shoker Pranav Shyam Szymon Sidor Eric Sigler Maddie Simens Jordan Sitkin Katarina Slama Ian Sohl Benjamin Sokolowsky Yang Song Natalie Staudacher Felipe Petroski Natalie Summers Ilya Sutskever Jie Tang Nikolas Tezak Madeleine B. Phil Tillet Amin Tootoonchian Elizabeth Tseng Preston Tuggle Nick Turley Jerry Tworek Juan Felipe Andrea Vallone Arun Vijayvergiya Chelsea Voss Carroll Wainwright Justin Jay Alvin Wang Ben Wang Jonathan Ward Jason Wei CJ Weinmann Akila Welihinda Peter Welinder Jiayi Weng Lilian Weng Matt Wiethoff Dave Willner Clemens Winter Samuel Wolrich Hannah Wong Lauren Workman Sherwin Wu Jeff Wu Michael Wu Kai Xiao Tao Xu Sarah Yoo Kevin Yu Qiming Yuan Wojciech Zaremba Rowan Zellers Chong Zhang Marvin Zhang Shengjia Zhao Tianhao Zheng Juntang Zhuang William Zhuk and Barret Zoph. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_1_11_2","unstructured":"Amey Agrawal Ashish Panwar Jayashree Mohan Nipun Kwatra Bhargav\u00a0S Gulavani and Ramachandran Ramjee. 2023. Sarathi: Efficient llm inference by piggybacking decodes with chunked prefills. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.16369 (2023)."},{"key":"e_1_3_3_1_12_2","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru M\u00e9rouane Debbah \u00c9tienne Goffinet Daniel Hesslow Julien Launay Quentin Malartic Daniele Mazzotta Badreddine Noune Baptiste Pannier and Guilherme Penedo. 2023. The Falcon Series of Open Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.16867 (2023)."},{"key":"e_1_3_3_1_13_2","unstructured":"Paul Barham Aakanksha Chowdhery Jeff Dean Sanjay Ghemawat Steven Hand Dan Hurt Michael Isard Hyeontaek Lim Ruoming Pang Sudip Roy Brennan Saeta Parker Schuh Ryan Sepassi Laurent El\u00a0Shafey Chandramohan A.\u00a0Thekkath and Yonghui Wu. 2022. Pathways: Asynchronous distributed dataflow for ml. Proceedings of Machine Learning and Systems 4 (2022) 430\u2013449."},{"key":"e_1_3_3_1_14_2","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589048"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00022"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00025"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Yu-Hsin Chen Tushar Krishna Joel\u00a0S Emer and Vivienne Sze. 2016. Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE journal of solid-state circuits 52 1 (2016) 127\u2013138.","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"e_1_3_3_1_19_2","unstructured":"Aakanksha Chowdhery Sharan Narang Jacob Devlin Maarten Bosma Gaurav Mishra Adam Roberts Paul Barham Hyung Chung Charles Sutton Sebastian Gehrmann Parker Schuh Kensen Shi Sasha Tsvyashchenko Joshua Maynez Abhishek Rao Parker Barnes Yi Tay Noam Shazeer Vinodkumar Prabhakaran Emily Reif Nan Du Ben Hutchinson Reiner Pope James Bradbury Jacob Austin Michael Isard Guy Gur-Ari Pengcheng Yin Toju Duke Anselm Levskaya Sanjay Ghemawat Sunipa Dev Henryk Michalewski Xavier Garcia Vedant Misra Kevin Robinson Liam Fedus Denny Zhou Daphne Ippolito David Luan Hyeontaek Lim Barret Zoph Alexander Spiridonov Ryan Sepassi David Dohan Shivani Agrawal Mark Omernick Andrew M.\u00a0Dai Thanumalayan Pillai Marie Pellat Aitor Lewkowycz Erica Moreira Rewon Child Oleksandr Polozov Katherine Lee Zongwei Zhou Xuezhi Wang Brennan Saeta Mark Diaz Orhan Firat Michele Catasta Jason Wei Kathy Meier-Hellstern Douglas Eck Jeff Dean Slav Petrov and Noah Fiedel. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24 240 (2023) 1\u2013113."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614246"},{"key":"e_1_3_3_1_21_2","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan Anirudh Goyal Anthony Hartshorn Aobo Yang Archi Mitra Archie Sravankumar Artem Korenev Arthur Hinsvark Arun Rao Aston Zhang Aurelien Rodriguez Austen Gregerson Ava Spataru Baptiste Roziere Bethany Biron Binh Tang Bobbie Chern Charlotte Caucheteux Chaya Nayak Chloe Bi Chris Marra Chris McConnell Christian Keller Christophe Touret Chunyang Wu Corinne Wong Cristian Canton Cyrus Nikolaidis Damien Allonsius Daniel Song Danielle Pintz Danny Livshits David Esiobu Dhruv Choudhary Dhruv Mahajan Diego Garcia-Olano Diego Perino Dieuwke Hupkes Egor Lakomkin Ehab AlBadawy Elina Lobanova Emily Dinan Eric Michael Filip Radenovic Frank Zhang Gabriel Synnaeve Gabrielle Lee Georgia Lewis Graeme Nail Gregoire Mialon Guan Pang Guillem Cucurell Hailey Nguyen Hannah Korevaar Hu Xu Hugo Touvron Iliyan Zarov Imanol Arrieta Isabel Kloumann Ishan Misra Ivan Evtimov Jade Copet Jaewon Lee Jan Geffert Jana Vranes Jason Park Jay Mahadeokar Jeet Shah Jelmer van Jennifer Billock Jenny Hong Jenya Lee Jeremy Fu Jianfeng Chi Jianyu Huang Jiawen Liu Jie Wang Jiecao Yu Joanna Bitton Joe Spisak Jongsoo Park Joseph Rocca Joshua Johnstun Joshua Saxe Junteng Jia Kalyan Vasuden Kartikeya Upasani Kate Plawiak Ke Li Kenneth Heafield Kevin Stone Khalid El-Arini Krithika Iyer Kshitiz Malik Kuenley Chiu Kunal Bhalla Lauren Rantala-Yeary Laurens van Lawrence Chen Liang Tan Liz Jenkins Louis Martin Lovish Madaan Lubo Malo Lukas Blecher Lukas Landzaat Luke de Madeline Muzzi Mahesh Pasupuleti Mannat Singh Manohar Paluri Marcin Kardas Mathew Oldham Mathieu Rita Maya Pavlova Melanie Kambadur Mike Lewis Min Si Mitesh Kumar Mona Hassan Naman Goyal Narjes Torabi Nikolay Bashlykov Nikolay Bogoychev Niladri Chatterji Olivier Duchenne Onur \u00c7elebi Patrick Alrassy Pengchuan Zhang Pengwei Li Petar Vasic Peter Weng Prajjwal Bhargava Pratik Dubal Praveen Krishnan Punit Singh Puxin Xu Qing He Qingxiao Dong Ragavan Srinivasan Raj Ganapathy Ramon Calderer Ricardo Silveira Robert Stojnic Roberta Raileanu Rohit Girdhar Rohit Patel Romain Sauvestre Ronnie Polidoro Roshan Sumbaly Ross Taylor Ruan Silva Rui Hou Rui Wang Saghar Hosseini Sahana Chennabasappa Sanjay Singh Sean Bell Seohyun Sonia Sergey Edunov Shaoliang Nie Sharan Narang Sharath Raparthy Sheng Shen Shengye Wan Shruti Bhosale Shun Zhang Simon Vandenhende Soumya Batra Spencer Whitman Sten Sootla Stephane Collot Suchin Gururangan Sydney Borodinsky Tamar Herman Tara Fowler Tarek Sheasha Thomas Georgiou Thomas Scialom Tobias Speckbacher Todor Mihaylov Tong Xiao Ujjwal Karn Vedanuj Goswami Vibhor Gupta Vignesh Ramanathan Viktor Kerkez Vincent Gonguet Virginie Do Vish Vogeti Vladan Petrovic Weiwei Chu Wenhan Xiong Wenyin Fu Whitney Meers Xavier Martinet Xiaodong Wang Xiaoqing Ellen Xinfeng Xie Xuchao Jia Xuewei Wang Yaelle Goldschlag Yashesh Gaur Yasmine Babaei Yi Wen Yiwen Song Yuchen Zhang Yue Li Yuning Mao Zacharie Delpierre Zheng Yan Zhengxing Chen Zoe Papakipos Aaditya Singh Aaron Grattafiori Abha Jain Adam Kelsey Adam Shajnfeld Adithya Gangidi Adolfo Victoria Ahuva Goldstand Ajay Menon Ajay Sharma Alex Boesenberg Alex Vaughan Alexei Baevski Allie Feinstein Amanda Kallet Amit Sangani Anam Yunus Andrei Lupu Andres Alvarado Andrew Caples Andrew Gu Andrew Ho Andrew Poulton Andrew Ryan Ankit Ramchandani Annie Franco Aparajita Saraf Arkabandhu Chowdhury Ashley Gabriel Ashwin Bharambe Assaf Eisenman Azadeh Yazdan Beau James Ben Maurer Benjamin Leonhardi Bernie Huang Beth Loyd Beto De Bhargavi Paranjape Bing Liu Bo Wu Boyu Ni Braden Hancock Bram Wasti Brandon Spence Brani Stojkovic Brian Gamido Britt Montalvo Carl Parker Carly Burton Catalina Mejia Changhan Wang Changkyu Kim Chao Zhou Chester Hu Ching-Hsiang Chu Chris Cai Chris Tindal Christoph Feichtenhofer Damon Civin Dana Beaty Daniel Kreymer Daniel Li Danny Wyatt David Adkins David Xu Davide Testuggine Delia David Devi Parikh Diana Liskovich Didem Foss Dingkang Wang Duc Le Dustin Holland Edward Dowling Eissa Jamil Elaine Montgomery Eleonora Presani Emily Hahn Emily Wood Erik Brinkman Esteban Arcaute Evan Dunbar Evan Smothers Fei Sun Felix Kreuk Feng Tian Firat Ozgenel Francesco Caggioni Francisco Guzm\u00e1n Frank Kanayet Frank Seide Gabriela Medina Gabriella Schwarz Gada Badeer Georgia Swee Gil Halpern Govind Thattai Grant Herman Grigory Sizov Guangyi (Jack)Zhang Guna Lakshminarayanan Hamid Shojanazeri Han Zou Hannah Wang Hanwen Zha Haroun Habeeb Harrison Rudolph Helen Suk Henry Aspegren Hunter Goldman Ibrahim Damlaj Igor Molybog Igor Tufanov Irina-Elena Veliche Itai Gat Jake Weissman James Geboski James Kohli Japhet Asher Jean-Baptiste Gaya Jeff Marcus Jeff Tang Jennifer Chan Jenny Zhen Jeremy Reizenstein Jeremy Teboul Jessica Zhong Jian Jin Jingyi Yang Joe Cummings Jon Carvill Jon Shepard Jonathan McPhie Jonathan Torres Josh Ginsburg Junjie Wang Kai Wu Kam Hou Karan Saxena Karthik Prasad Kartikay Khandelwal Katayoun Zand Kathy Matosich Kaushik Veeraraghavan Kelly Michelena Keqian Li Kun Huang Kunal Chawla Kushal Lakhotia Kyle Huang Lailin Chen Lakshya Garg Lavender A Leandro Silva Lee Bell Lei Zhang Liangpeng Guo Licheng Yu Liron Moshkovich Luca Wehrstedt Madian Khabsa Manav Avalani Manish Bhatt Maria Tsimpoukelli Martynas Mankus Matan Hasson Matthew Lennie Matthias Reso Maxim Groshev Maxim Naumov Maya Lathi Meghan Keneally Michael L. Michal Valko Michelle Restrepo Mihir Patel Mik Vyatskov Mikayel Samvelyan Mike Clark Mike Macey Mike Wang Miquel Jubert Mo Metanat Mohammad Rastegari Munish Bansal Nandhini Santhanam Natascha Parks Natasha White Navyata Bawa Nayan Singhal Nick Egebo Nicolas Usunier Nikolay Pavlovich Ning Dong Ning Zhang Norman Cheng Oleg Chernoguz Olivia Hart Omkar Salpekar Ozlem Kalinli Parkin Kent Parth Parekh Paul Saab Pavan Balaji Pedro Rittner Philip Bontrager Pierre Roux Piotr Dollar Polina Zvyagina Prashant Ratanchandani Pritish Yuvraj Qian Liang Rachad Alao Rachel Rodriguez Rafi Ayub Raghotham Murthy Raghu Nayani Rahul Mitra Raymond Li Rebekkah Hogan Robin Battey Rocky Wang Rohan Maheswari Russ Howes Ruty Rinott Sai Jayesh Samyak Datta Sara Chugh Sara Hunt Sargun Dhillon Sasha Sidorov Satadru Pan Saurabh Verma Seiji Yamamoto Sharadh Ramaswamy Shaun Lindsay Shaun Lindsay Sheng Feng Shenghao Lin Shengxin Cindy Shiva Shankar Shuqiang Zhang Shuqiang Zhang Sinong Wang Sneha Agarwal Soji Sajuyigbe Soumith Chintala Stephanie Max Stephen Chen Steve Kehoe Steve Satterfield Sudarshan Govindaprasad Sumit Gupta Sungmin Cho Sunny Virk Suraj Subramanian Sy Choudhury Sydney Goldman Tal Remez Tamar Glaser Tamara Best Thilo Kohler Thomas Robinson Tianhe Li Tianjun Zhang Tim Matthews Timothy Chou Tzook Shaked Varun Vontimitta Victoria Ajayi Victoria Montanez Vijai Mohan Vinay Satish Vishal Mangla V\u00edtor Albiero Vlad Ionescu Vlad Poenaru Vlad Tiberiu Vladimir Ivanov Wei Li Wenchen Wang Wenwen Jiang Wes Bouaziz Will Constable Xiaocheng Tang Xiaofang Wang Xiaojian Wu Xiaolan Wang Xide Xia Xilun Wu Xinbo Gao Yanjun Chen Ye Hu Ye Jia Ye Qi Yenda Li Yilin Zhang Ying Zhang Yossi Adi Youngjin Nam Yu (Sid)Wang Yuchen Hao Yundi Qian Yuzi He Zach Rait Zachary DeVito Zef Rosnbrick Zhaoduo Wen Zhenyu Yang and Zhiwei Zhao. 2024. The llama 3 herd of models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.21783 (2024)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441593"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Yinxiao Feng and Kaisheng Ma. 2024. Switch-Less Dragonfly on Wafers: A Scalable Interconnection Architecture based on Wafer-Scale Integration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.10290 (2024).","DOI":"10.1109\/SC41406.2024.00102"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304014"},{"key":"e_1_3_3_1_26_2","unstructured":"Raveesh Garg Hyoukjun Kwon Eric Qin Yu-Hsin Chen Tushar Krishna and Liangzhen Lai. 2024. PipeOrgan: Efficient Inter-operation Pipelining with Flexible Spatial Organization and Interconnects. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.01736 (2024)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Haoming Guo Shengbin Cao Lei Li and Xiaofeng Zhang. 2022. A review on the mainstream through-silicon via etching methods. Materials Science in Semiconductor Processing 137 (2022) 106182.","DOI":"10.1016\/j.mssp.2021.106182"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446762"},{"key":"e_1_3_3_1_29_2","unstructured":"Connor Holmes Masahiro Tanaka Michael Wyatt Ammar Ahmad Jeff Rasley Samyam Rajbhandari Reza Yazdani Heyang Qin Arash Bakhtiari Lev Kurilenko and Yuxiong He. 2024. Deepspeed-fastgen: High-throughput text generation for llms via mii and deepspeed-inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.08671 (2024)."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"SY Hou W\u00a0Chris Chen Clark Hu Christine Chiu KC Ting TS Lin WH Wei WC Chiou Vic\u00a0JC Lin Victor\u00a0CY Chang C Wang C Wu and D Yu. 2017. Wafer-level integration of an advanced logic-memory system through the second-generation CoWoS technology. IEEE Transactions on Electron Devices 64 10 (2017) 4071\u20134077.","DOI":"10.1109\/TED.2017.2737644"},{"key":"e_1_3_3_1_31_2","unstructured":"Cunchen Hu Heyang Huang Junhao Hu Jiang Xu Xusheng Chen Tao Xie Chenxi Wang Sa Wang Yungang Bao Ninghui Sun and Yizhou Shan. 2024. Memserve: Context caching for disaggregated llm serving with elastic memory pool. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.17565 (2024)."},{"key":"e_1_3_3_1_32_2","unstructured":"Cunchen Hu Heyang Huang Liangliang Xu Xusheng Chen Jiang Xu Shuang Chen Hao Feng Chenxi Wang Sa Wang Yungang Bao Ninghui Sun and Yizhou Shan. 2024. Inference without interference: Disaggregate llm inference for mixed downstream workloads. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.11181 (2024)."},{"key":"e_1_3_3_1_33_2","first-page":"1022","volume-title":"2023 IEEE 73rd Electronic Components and Technology Conference, ECTC 2023","author":"Hu YuChen","year":"2023","unstructured":"YuChen Hu, YuMin Liang, HsiehPin Hu, ChiaYen Tan, ChihTa Shen, ChienHsun Lee, and SY Hou. 2023. CoWoS architecture evolution for next generation HPC on 2.5 D system in package. In 2023 IEEE 73rd Electronic Components and Technology Conference, ECTC 2023. IEEE, 1022\u20131026."},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Yang Hu Xinhan Lin Huizheng Wang Zhen He Xingmao Yu Jiahao Zhang Qize Yang Zheng Xu Sihan Guan Jiahao Fang Haoran Shang Xinru Tang Xu Dai Shaojun Wei and Shouyi Yin. 2024. Wafer-Scale Computing: Advancements Challenges and Future Perspectives. IEEE Circuits and Systems Magazine 24 1 (2024) 52\u201381.","DOI":"10.1109\/MCAS.2024.3349669"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ECTC32696.2021.00028"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00050"},{"key":"e_1_3_3_1_37_2","unstructured":"Yanping Huang Youlong Cheng Ankur Bapna Orhan Firat Dehao Chen Mia Chen HyoukJoong Lee Jiquan Ngiam Quoc\u00a0V Le Yonghui Wu and Zhifeng Chen. 2019. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_1_38_2","unstructured":"Zhihao Jia James Thomas Todd Warszawski Mingyu Gao Matei Zaharia and Alex Aiken. 2019. Optimizing DNN computation with relaxed graph substitutions. Proceedings of Machine Learning and Systems 1 (2019) 27\u201339."},{"key":"e_1_3_3_1_39_2","unstructured":"Albert\u00a0Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio\u00a0Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven\u00a0Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William\u00a0El Sayed. 2023. Mistral 7B. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.06825 (2023)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Bentian Jiang Jingsong Chen Jinwei Liu Lixin Liu Fangzhou Wang Xiaopeng Zhang and Evangeline\u00a0FY Young. 2021. CU. POKer: Placing DNNs on WSE With Optimal Kernel Sizing and Efficient Protocol Optimization. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems 41 6 (2021) 1888\u20131901.","DOI":"10.1109\/TCAD.2021.3096458"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS61541.2024.00035"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00058"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415639"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","unstructured":"Hyoukjun Kwon Ananda Samajdar and Tushar Krishna. 2018. Maeri: Enabling flexible dataflow mapping over dnn accelerators via reconfigurable interconnects. ACM SIGPLAN Notices 53 2 (2018) 461\u2013475.","DOI":"10.1145\/3296957.3173176"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"crossref","unstructured":"John\u00a0H Lau Gary\u00a0ChangFu Chen Jones\u00a0YuCheng Huang Ricky\u00a0TsunSheng Chou Channing\u00a0ChengLin Yang HsingNing Liu and TzyyJang Tseng. 2021. Hybrid substrate by fan-out RDL-first panel-level packaging. IEEE Transactions on Components Packaging and Manufacturing Technology 11 8 (2021) 1301\u20131309.","DOI":"10.1109\/TCPMT.2021.3096786"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"John\u00a0H Lau ChengTa Ko KaiMing Yang ChiaYu Peng Tim Xia Puru\u00a0Bruce Lin JJ Chen Patrick\u00a0PoChun Huang HsingNing Liu TzyyJang Tseng Eagle Lin and Leo Chang. 2020. Panel-level fan-out RDL-first packaging for heterogeneous integration. IEEE Transactions on Components Packaging and Manufacturing Technology 10 7 (2020) 1125\u20131137.","DOI":"10.1109\/TCPMT.2020.2996658"},{"key":"e_1_3_3_1_49_2","unstructured":"Dmitry Lepikhin HyoukJoong Lee Yuanzhong Xu Dehao Chen Orhan Firat Yanping Huang Maxim Krikun Noam Shazeer and Zhifeng Chen. 2020. Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2006.16668 (2020)."},{"key":"e_1_3_3_1_50_2","first-page":"6543","volume-title":"International Conference on Machine Learning","author":"Li Zhuohan","year":"2021","unstructured":"Zhuohan Li, Siyuan Zhuang, Shiyuan Guo, Danyang Zhuo, Hao Zhang, Dawn Song, and Ion Stoica. 2021. Terapipe: Token-level pipeline parallelism for training large-scale language models. In International Conference on Machine Learning. PMLR, 6543\u20136552."},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895479"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530565"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00062"},{"key":"e_1_3_3_1_54_2","unstructured":"Canhui Luo Zhouxing Su and Zhipeng L\u00fc. 2023. MS-CLS: An Effective Partitioning and Placement Metaheuristic for Wafer-Scale Physics Modeling. IEEE Transactions on Emerging Topics in Computational Intelligence (2023)."},{"key":"e_1_3_3_1_55_2","first-page":"2430","volume-title":"International conference on machine learning","author":"Mirhoseini Azalia","year":"2017","unstructured":"Azalia Mirhoseini, Hieu Pham, Quoc\u00a0V Le, Benoit Steiner, Rasmus Larsen, Yuefeng Zhou, Naveen Kumar, Mohammad Norouzi, Samy Bengio, and Jeff Dean. 2017. Device placement optimization with reinforcement learning. In International conference on machine learning. PMLR, 2430\u20132439."},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"crossref","unstructured":"Gordon\u00a0E Moore. 1998. Cramming more components onto integrated circuits. Proc. IEEE 86 1 (1998) 82\u201385.","DOI":"10.1109\/JPROC.1998.658762"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_3_1_58_2","unstructured":"Marcelo Orenes-Vera Esin Tureci Margaret Martonosi and David Wentzlaff. 2023. DCRA: A distributed chiplet-based reconfigurable architecture for irregular applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.15443 (2023)."},{"key":"e_1_3_3_1_59_2","first-page":"48","volume-title":"2024 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2024","author":"OrenesVera Marcelo","year":"2024","unstructured":"Marcelo OrenesVera, Esin Tureci, Margaret Martonosi, and David Wentzlaff. 2024. Muchisim: A simulation framework for design exploration of multi-chip manycore systems. In 2024 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2024. IEEE, 48\u201360."},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/3505170.3506730"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586194"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00042"},{"key":"e_1_3_3_1_63_2","unstructured":"Pratyush Patel Esha Choukse Chaojie Zhang Aashaka Shah \u00cd\u00f1igo Goiri Saeed Maleki and Ricardo Bianchini. 2023. Splitwise: Efficient generative llm inference using phase splitting. Power 400 700W (2023) 1\u201375."},{"key":"e_1_3_3_1_64_2","unstructured":"Reiner Pope Sholto Douglas Aakanksha Chowdhery Jacob Devlin James Bradbury Jonathan Heek Kefan Xiao Shivani Agrawal and Jeff Dean. 2023. Efficiently scaling transformer inference. Proceedings of Machine Learning and Systems 5 (2023) 606\u2013624."},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00024"},{"key":"e_1_3_3_1_66_2","unstructured":"Saeed Rashidi William Won Sudarshan Srinivasan Puneet Gupta and Tushar Krishna. 2024. FRED: Flexible REduction-Distribution Interconnect and Communication Implementation for Wafer-Scale Distributed Training of DNN Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.19580 (2024)."},{"key":"e_1_3_3_1_67_2","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12950 (2023)."},{"key":"e_1_3_3_1_68_2","unstructured":"Teven Scao Angela Fan Christopher Akiki Ellie Pavlick Suzana Ili\u0107 Daniel Hesslow Roman Castagn\u00e9 Alexandra Luccioni Fran\u00e7ois Yvon Matthias Gall\u00e9 Jonathan Tow Alexander Rush Stella Biderman Albert Webson Pawan Ammanamanchi Thomas Wang Beno\u00eet Sagot Niklas Muennighoff Albert Moral Olatunji Ruwase Rachel Bawden Stas Bekman Angelina McMillan-Major Iz Beltagy Huu Nguyen Lucile Saulnier Samson Tan Pedro Suarez Victor Sanh Hugo Lauren\u00e7on Yacine Jernite Julien Launay Margaret Mitchell Colin Raffel Aaron Gokaslan Adi Simhi Aitor Soroa Alham Aji Amit Alfassy Anna Rogers Ariel Nitzav Canwen Xu Chenghao Mou Chris Emezue Christopher Klamm Colin Leong Daniel Strien David Adelani Dragomir Radev Eduardo Ponferrada Efrat Levkovizh Ethan Kim Eyal Natan Francesco Toni G\u00e9rard Dupont Germ\u00e1n Kruszewski Giada Pistilli Hady Elsahar Hamza Benyamina Hieu Tran Ian Yu Idris Abdulmumin Isaac Johnson Itziar Gonzalez-Dios Javier Rosa Jenny Chim Jesse Dodge Jian Zhu Jonathan Chang J\u00f6rg Frohberg Joseph Tobing Joydeep Bhattacharjee Khalid Almubarak Kimbo Chen Kyle Lo Leandro Werra Leon Weber Long Phan Loubna allal Ludovic Tanguy Manan Dey Manuel Mu\u00f1oz Maraim Masoud Mar\u00eda Grandury Mario \u0160a\u0161ko Max Huang Maximin Coavoux Mayank Singh Mike Jiang Minh Vu Mohammad Jauhar Mustafa Ghaleb Nishant Subramani Nora Kassner Nurulaqilla Khamis Olivier Nguyen Omar Espejel Ona Gibert Paulo Peter Henderson Pierre Colombo Priscilla Amuok Quentin Lhoest Rheza Harliman Rishi Bommasani Roberto L\u00f3pez Rui Ribeiro Salomey Osei Sampo Pyysalo Sebastian Nagel Shamik Bose Shamsuddeen Muhammad Shanya Sharma Shayne Longpre Somaieh Nikpoor Stanislav Silberberg Suhas Pai Sydney Zink Tiago Torrent Timo Schick Tristan Thrush Valentin Danchev Vassilina Nikoulina Veronika Laippala Violette Lepercq Vrinda Prabhu Zaid Alyafeai Zeerak Talat Arun Raja Benjamin Heinzerling Chenglei Si Davut Ta\u015far Elizabeth Salesky Sabrina Mielke Wilson Lee Abheesht Sharma Andrea Santilli Antoine Chaffin Arnaud Stiegler Debajyoti Datta Eliza Szczechla Gunjan Chhablani Han Wang Harshit Pandey Hendrik Strobelt Jason Fries Jos Rozen Leo Gao Lintang Sutawika M Bari Maged Al-shaibani Matteo Manica Nihal Nayak Ryan Teehan Samuel Albanie Sheng Shen Srulik Ben-David Stephen Bach Taewoon Kim Tali Bers Thibault Fevry Trishala Neeraj Urmish Thakker Vikas Raunak Xiangru Tang Zheng-Xin Yong Zhiqing Sun Shaked Brody Yallow Uri Hadar Tojarieh Adam Roberts Hyung Chung Jaesung Tae Jason Phang Ofir Press Conglong Li Deepak Narayanan Hatim Bourfoune Jared Casper Jeff Rasley Max Ryabinin Mayank Mishra Minjia Zhang Mohammad Shoeybi Myriam Peyrounette Nicolas Patry Nouamane Tazi Omar Sanseviero Patrick Platen Pierre Cornette Pierre Lavall\u00e9e R\u00e9mi Lacroix Samyam Rajbhandari Sanchit Gandhi Shaden Smith St\u00e9phane Requena Suraj Patil Tim Dettmers Ahmed Baruwa Amanpreet Singh Anastasia Cheveleva Anne-Laure Ligozat Arjun Subramonian Aur\u00e9lie N\u00e9v\u00e9ol Charles Lovering Dan Garrette Deepak Tunuguntla Ehud Reiter Ekaterina Taktasheva Ekaterina Voloshina Eli Bogdanov Genta Winata Hailey Schoelkopf Jan-Christoph Kalo Jekaterina Novikova Jessica Forde Jordan Clive Jungo Kasai Ken Kawamura Liam Hazan Marine Carpuat Miruna Clinciu Najoung Kim Newton Cheng Oleg Serikov Omer Antverg Oskar Wal Rui Zhang Ruochen Zhang Sebastian Gehrmann Shachar Mirkin Shani Pais Tatiana Shavrina Thomas Scialom Tian Yun Tomasz Limisiewicz Verena Rieser Vitaly Protasov Vladislav Mikhailov Yada Pruksachatkun Yonatan Belinkov Zachary Bamberger Zden\u011bk Kasner Alice Rueda Amanda Pestana Amir Feizpour Ammar Khan Amy Faranak Ana Santos Anthony Hevia Antigona Unldreaj Arash Aghagol Arezoo Abdollahi Aycha Tammour Azadeh HajiHosseini Bahareh Behroozi Benjamin Ajibade Bharat Saxena Carlos Ferrandis Daniel McDuff Danish Contractor David Lansky Davis David Douwe Kiela Duong Nguyen Edward Tan Emi Baylor Ezinwanne Ozoani Fatima Mirza Frankline Ononiwu Habib Rezanejad Hessie Jones Indrani Bhattacharya Irene Solaiman Irina Sedenko Isar Nejadgholi Jesse Passmore Josh Seltzer Julio Sanz Livia Dutra Mairon Samagaio Maraim Elbadri Margot Mieskes Marissa Gerchick Martha Akinlolu Michael McKenna Mike Qiu Muhammed Ghauri Mykola Burynok Nafis Abrar Nazneen Rajani Nour Elkott Nour Fahmy Olanrewaju Samuel Ran An Rasmus Kromann Ryan Hao Samira Alizadeh Sarmad Shubber Silas Wang Sourav Roy Sylvain Viguier Thanh Le Tobi Oyebade Trieu Le Yoyo Yang Zach Nguyen Abhinav Kashyap Alfredo Palasciano Alison Callahan Anima Shukla Antonio Miranda-Escalada Ayush Singh Benjamin Beilharz Bo Wang Caio Brito Chenxi Zhou Chirag Jain Chuxin Xu Cl\u00e9mentine Fourrier Daniel Peri\u00f1\u00e1n Daniel Molano Dian Yu Enrique Manjavacas Fabio Barth Florian Fuhrimann Gabriel Altay Giyaseddin Bayrak Gully Burns Helena Vrabec Imane Bello Ishani Dash Jihyun Kang John Giorgi Jonas Golde Jose Posada Karthik Sivaraman Lokesh Bulchandani Lu Liu Luisa Shinzato Madeleine Bykhovetz Maiko Takeuchi Marc P\u00e0mies Maria Castillo Marianna Nezhurina Mario S\u00e4nger Matthias Samwald Michael Cullan Michael Weinberg Michiel Wolf Mina Mihaljcic Minna Liu Moritz Freidank Myungsun Kang Natasha Seelam Nathan Dahlberg Nicholas Broad Nikolaus Muellner Pascale Fung Patrick Haller Ramya Chandrasekhar Renata Eisenberg Robert Martin Rodrigo Canalli Rosaline Su Ruisi Su Samuel Cahyawijaya Samuele Garda Shlok Deshmukh Shubhanshu Mishra Sid Kiblawi Simon Ott Sinee Sang-aroonsiri Srishti Kumar Stefan Schweter Sushil Bharati Tanmay Laud Th\u00e9o Gigant Tomoya Kainuma Wojciech Kusa Yanis Labrak Yash Bajaj Yash Venkatraman Yifan Xu Yingxin Xu Yu Xu Zhe Tan Zhongli Xie Zifan Ye Mathilde Bras Younes Belkada and Thomas Wolf. 2023. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.05100v4 (2023)."},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2010.5536970"},{"key":"e_1_3_3_1_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358302"},{"key":"e_1_3_3_1_71_2","unstructured":"Noam Shazeer Youlong Cheng Niki Parmar Dustin Tran Ashish Vaswani Penporn Koanantakool Peter Hawkins HyoukJoong Lee Mingsheng Hong Cliff Young R Sepassi and B Hechtman. 2018. Mesh-tensorflow: Deep learning for supercomputers. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_1_72_2","unstructured":"Mingcong Song Xinru Tang Fengfan Hou Jing Li Wei Wei Yipeng Ma Runqiu Xiao Hongjie Si Dingcheng Jiang Shouyi Yin Yang Hu and Guoping Long. 2024. Tackling the dynamicity in a production llm serving system with sota optimizations via hybrid prefill\/decode\/verify scheduling on efficient meta-kernels. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.18106 (2024)."},{"key":"e_1_3_3_1_73_2","unstructured":"Foteini Strati Sara Mcallister Amar Phanishayee Jakub Tarnawski and Ana Klimovic. 2024. D\u00e9j\u00e0Vu: KV-cache Streaming for Fast Fault-tolerant Generative LLM Serving. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.01876 (2024)."},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895534"},{"key":"e_1_3_3_1_75_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00083"},{"key":"e_1_3_3_1_76_2","unstructured":"Romal Thoppilan Daniel Freitas Jamie Hall Noam Shazeer Apoorv Kulshreshtha HengTze Cheng Alicia Jin Taylor Bos Leslie Baker Yu Du YaGuang Li Hongrae Lee Huaixiu Steven Amin Ghafouri Marcelo Menegali Yanping Huang Maxim Krikun Dmitry Lepikhin James Qin Dehao Chen Yuanzhong Xu Zhifeng Chen Adam Roberts Maarten Bosma Vincent Zhao Yanqi Zhou ChungChing Chang Igor Krivokon Will Rusch Marc Pickett Pranesh Srinivasan Laichee Man Kathleen MeierHellstern Meredith Ringel Tulsee Doshi Renelito Delos Toju Duke Johnny Soraker Ben Zevenbergen Vinodkumar Prabhakaran Mark Diaz Ben Hutchinson Kristen Olson Alejandra Molina Erin HoffmanJohn Josh Lee Lora Aroyo Ravi Rajakumar Alena Butryna Matthew Lamm Viktoriya Kuzmina Joe Fenton Aaron Cohen Rachel Bernstein Ray Kurzweil Blaise AgueraArcas Claire Cui Marian Croak Ed Chi and Quoc Le. 2022. Lamda: Language models for dialog applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2201.08239 (2022)."},{"key":"e_1_3_3_1_77_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet MarieAnne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar A Rodriguez A Joulin E Grave and G Lample. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_78_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Koura MarieAnne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Smith Ranjan Subramanian Xiaoqing Ellen Binh Tang Ross Taylor Adina Williams Jian Xiang Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09288 (2023)."},{"key":"e_1_3_3_1_79_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_80_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"},{"key":"e_1_3_3_1_81_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD45719.2019.8942127"},{"key":"e_1_3_3_1_82_2","doi-asserted-by":"crossref","unstructured":"Huizheng Wang Qize Yang Taiquan Wei Xingmao Yu Chengran Li Jiahao Fang Guangyang Lu Xu Dai Liang Liu Shenfei Jiang Yang Hu Shouyi Yin and Shaojun Wei. 2024. TMAC: Training-targeted Mapping and Architecture Co-exploration for Wafer-scale Chips. Integrated Circuits and Systems (2024).","DOI":"10.23919\/ICS.2024.3515003"},{"key":"e_1_3_3_1_83_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00068"},{"key":"e_1_3_3_1_84_2","first-page":"283","volume-title":"2023 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2023","author":"Won William","year":"2023","unstructured":"William Won, Taekyung Heo, Saeed Rashidi, Srinivas Sridharan, Sudarshan Srinivasan, and Tushar Krishna. 2023. Astra-sim2. 0: Modeling hierarchical networks and disaggregated systems for large-model training at scale. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2023. IEEE, 283\u2013294."},{"key":"e_1_3_3_1_85_2","unstructured":"Bingyang Wu Shengyu Liu Yinmin Zhong Peng Sun Xuanzhe Liu and Xin Jin. 2024. LoongServe: Efficiently Serving Long-context Large Language Models with Elastic Sequence Parallelism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.09526 (2024)."},{"key":"e_1_3_3_1_86_2","unstructured":"Bingyang Wu Yinmin Zhong Zili Zhang Shengyu Liu Fangyue Liu Yuanhang Sun Gang Huang Xuanzhe Liu and Xin Jin. 2023. Fast distributed inference serving for large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.05920 (2023)."},{"key":"e_1_3_3_1_87_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00086"},{"key":"e_1_3_3_1_88_2","unstructured":"Yuanzhong Xu HyoukJoong Lee Dehao Chen Hongjun Choi Blake Hechtman and Shibo Wang. 2020. Automatic cross-replica sharding of weight update in data-parallel training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2004.13336 (2020)."},{"key":"e_1_3_3_1_89_2","unstructured":"Yuanzhong Xu HyoukJoong Lee Dehao Chen Blake Hechtman Yanping Huang Rahul Joshi Maxim Krikun Dmitry Lepikhin Andy Ly Marcello Maggioni Ruoming Pang Noam Shazeer Shibo Wang Tao Wang Yonghui Wu and Zhifeng Chen. 2021. Gspmd: general and scalable parallelization for ml computation graphs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2105.04663 (2021)."},{"key":"e_1_3_3_1_90_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655683"},{"key":"e_1_3_3_1_91_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378514"},{"key":"e_1_3_3_1_92_2","first-page":"521","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 22","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo\u00a0Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. 2022. Orca: A distributed serving system for Transformer-Based generative models. In 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 22. 521\u2013538."},{"key":"e_1_3_3_1_93_2","unstructured":"Jinhui Yuan Xinqi Li Cheng Cheng Juncheng Liu Ran Guo Shenghang Cai Chi Yao Fei Yang Xiaodong Yi Chuan Wu H Zhang and Zhao J. 2021. Oneflow: Redesign the distributed deep learning framework from scratch. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.15032 (2021)."},{"key":"e_1_3_3_1_94_2","unstructured":"Hao Zhang Yuan Li Zhijie Deng Xiaodan Liang Lawrence Carin and Eric Xing. 2020. Autosync: Learning to synchronize for data-parallel distributed deep learning. Advances in Neural Information Processing Systems 33 (2020) 906\u2013917."},{"key":"e_1_3_3_1_95_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00082"},{"key":"e_1_3_3_1_96_2","doi-asserted-by":"crossref","unstructured":"Min Zhang Fei Qin Si Chen Yanwei Dai Pei Chen and Tong An. 2022. Protrusion of through-silicon-via (TSV) copper with double annealing processes. Journal of Electronic Materials 51 5 (2022) 2433\u20132449.","DOI":"10.1007\/s11664-022-09503-z"},{"key":"e_1_3_3_1_97_2","unstructured":"Susan Zhang Stephen Roller Naman Goyal Mikel Artetxe Moya Chen Shuohui Chen Christopher Dewan Mona Diab Xian Li Xi Lin Todor Mihaylov Myle Ott Sam Shleifer Kurt Shuster Daniel Simig Punit Koura Anjali Sridhar Tianlu Wang and Luke Zettlemoyer. 2022. Opt: Open pre-trained transformer language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.01068 (2022)."},{"key":"e_1_3_3_1_98_2","unstructured":"Zhen Zhang Shuai Zheng Yida Wang Justin Chiu George Karypis Trishul Chilimbi Mu Li and Xin Jin. 2022. MiCS: near-linear scaling for training gigantic model on public cloud. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.00119 (2022)."},{"key":"e_1_3_3_1_99_2","unstructured":"Xiaoyan Zhao Jiale Zhang Junna Zhang Peiyan Yuan Hu Jin and Xiangyang Li. 2023. CooCo: A Collaborative Offloading and Resource Configuration Algorithm in Edge Networks. IEEE Internet of Things Journal (2023)."},{"key":"e_1_3_3_1_100_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00042"},{"key":"e_1_3_3_1_101_2","unstructured":"Yinmin Zhong Shengyu Liu Junda Chen Jianbo Hu Yibo Zhu Xuanzhe Liu Xin Jin and Hao Zhang. 2024. Distserve: Disaggregating prefill and decoding for goodput-optimized large language model serving. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.09670 (2024)."},{"key":"e_1_3_3_1_102_2","unstructured":"Jingchen Zhu Chenhao Xue Yiqi Chen Zhao Wang and Guangyu Sun. 2024. Theseus: Towards High-Efficiency Wafer-Scale Chip Design Space Exploration for Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.02079 (2024)."}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731101","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:03:10Z","timestamp":1750503790000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731101"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":101,"alternative-id":["10.1145\/3695053.3731101","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731101","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}