{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T15:31:43Z","timestamp":1773588703679,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":99,"publisher":"ACM","funder":[{"name":"National Key R&amp;D Program of China","award":["2024YFB4505501"],"award-info":[{"award-number":["2024YFB4505501"]}]},{"name":"NSF of China","award":["62341411"],"award-info":[{"award-number":["62341411"]}]},{"name":"NSF of China","award":["62525203"],"award-info":[{"award-number":["62525203"]}]},{"name":"NSF of China","award":["62222214"],"award-info":[{"award-number":["62222214"]}]},{"name":"NSF of China","award":["U22A2028"],"award-info":[{"award-number":["U22A2028"]}]},{"name":"NSF of China","award":["62302478"],"award-info":[{"award-number":["62302478"]}]},{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0660200"],"award-info":[{"award-number":["XDB0660200"]}]},{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0660201"],"award-info":[{"award-number":["XDB0660201"]}]},{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0660202"],"award-info":[{"award-number":["XDB0660202"]}]},{"name":"CAS Project for Young Scientists in Basic Research","award":["YSBR-029"],"award-info":[{"award-number":["YSBR-029"]}]},{"name":"Youth Innovation Promotion Association CAS","award":["N&#x5c;&#x2f;A"],"award-info":[{"award-number":["N&#x5c;&#x2f;A"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,22]]},"DOI":"10.1145\/3779212.3790169","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T13:55:26Z","timestamp":1773150926000},"page":"876-895","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Hardwired-Neuron Language Processing Units as General-Purpose Cognitive Substrates"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-2285-534X","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8556-8856","authenticated-orcid":false,"given":"Yi","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5503-4457","authenticated-orcid":false,"given":"Yongwei","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9823-2573","authenticated-orcid":false,"given":"Yifan","family":"Hao","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2510-3034","authenticated-orcid":false,"given":"Zifu","family":"Zheng","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8555-4958","authenticated-orcid":false,"given":"Weihao","family":"Kong","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2862-8434","authenticated-orcid":false,"given":"Zhangmai","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6069-8876","authenticated-orcid":false,"given":"Dongchen","family":"Jiang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4235-0893","authenticated-orcid":false,"given":"Ruiyang","family":"Xia","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1568-4687","authenticated-orcid":false,"given":"Zhihong","family":"Ma","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6701-9578","authenticated-orcid":false,"given":"Zisheng","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2395-1308","authenticated-orcid":false,"given":"Zhaoyong","family":"Wan","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5736-5787","authenticated-orcid":false,"given":"Yunqi","family":"Lu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1457-3672","authenticated-orcid":false,"given":"Ximing","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5757-0416","authenticated-orcid":false,"given":"Hongrui","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9282-6431","authenticated-orcid":false,"given":"Zhihao","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute of Software, CAS, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7990-0055","authenticated-orcid":false,"given":"Zhe","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0894-6653","authenticated-orcid":false,"given":"Tianrui","family":"Ma","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0990-1190","authenticated-orcid":false,"given":"Mo","family":"Zou","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8691-8549","authenticated-orcid":false,"given":"Rui","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8877-9052","authenticated-orcid":false,"given":"Ling","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Software, CAS, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9979-0561","authenticated-orcid":false,"given":"Xing","family":"Hu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7603-4210","authenticated-orcid":false,"given":"Zidong","family":"Du","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1480-7265","authenticated-orcid":false,"given":"Zhiwei","family":"Xu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2530-5874","authenticated-orcid":false,"given":"Qi","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7601-0753","authenticated-orcid":false,"given":"Tianshi","family":"Chen","sequence":"additional","affiliation":[{"name":"Cambricon Technologies, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3925-5185","authenticated-orcid":false,"given":"Yunji","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,3,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527405"},{"key":"e_1_3_2_1_2_1","first-page":"145","article-title":"Think fast: A tensor streaming processor (TSP) for accelerating deep learning workloads. In 2020 ACM\/IEEE 47th Annual Int'l Symp. on Computer Architecture (ISCA)","author":"Abts Dennis","year":"2020","unstructured":"Dennis Abts, Jonathan Ross, Jonathan Sparling, Mark Wong-VanHaren, Max Baker, Tom Hawkins, Andrew Bell, John Thompson, Temesghen Kahsai, Garrin Kimmell, et al., 2020. Think fast: A tensor streaming processor (TSP) for accelerating deep learning workloads. In 2020 ACM\/IEEE 47th Annual Int'l Symp. on Computer Architecture (ISCA). IEEE, 145-158.","journal-title":"IEEE"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1038\/scientificamerican0387-88"},{"key":"e_1_3_2_1_4_1","unstructured":"AnySilicon. 2026. The Economics of ASIC. https:\/\/anysilicon.com\/the-economics-of-asic\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877-1901."},{"key":"e_1_3_2_1_6_1","first-page":"533","article-title":"Soma: Identifying, exploring, and understanding the dram communication scheduling space for dnn accelerators. In 2025 IEEE Int'l Symp. on High Performance Computer Architecture (HPCA)","author":"Cai Jingwei","year":"2025","unstructured":"Jingwei Cai, Xuan Wang, Mingyu Gao, Sen Peng, Zijian Zhu, Yuchen Wei, Zuotong Wu, and Kaisheng Ma. 2025. Soma: Identifying, exploring, and understanding the dram communication scheduling space for dnn accelerators. In 2025 IEEE Int'l Symp. on High Performance Computer Architecture (HPCA). IEEE, 533-548.","journal-title":"IEEE"},{"key":"e_1_3_2_1_7_1","unstructured":"Center for Security and Emerging Technologies (CSET). 2020. Analysts believe that a single TSMC 5nm wafer costs $17 000. https:\/\/cset.georgetown.edu\/article\/analysts-believe-that-a-single-tsmc-5nm-wafer-costs-17000\/."},{"key":"e_1_3_2_1_8_1","unstructured":"Cerebras. 2024a. Cerebras Inference. https:\/\/www.cerebras.ai\/inference"},{"key":"e_1_3_2_1_9_1","unstructured":"Cerebras. 2024b. Cerebras System. https:\/\/www.cerebras.ai\/system"},{"key":"e_1_3_2_1_10_1","unstructured":"Srijan Chakraborty. 2024. HASICs: Investigating hyperspecialized ASICs for neural network inference. M.S. thesis. University of Illinois at Urbana-Champaign. https:\/\/hdl.handle.net\/2142\/125737"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_12_1","first-page":"609","article-title":"Dadiannao: A machine-learning supercomputer. In 2014 47th Annual IEEE\/ACM Int'l Symp. on Microarchitecture","author":"Chen Yunji","year":"2014","unstructured":"Yunji Chen, Tao Luo, Shaoli Liu, Shijin Zhang, Liqiang He, Jia Wang, Ling Li, Tianshi Chen, Zhiwei Xu, Ninghui Sun, et al., 2014b. Dadiannao: A machine-learning supercomputer. In 2014 47th Annual IEEE\/ACM Int'l Symp. on Microarchitecture. IEEE, 609-622.","journal-title":"IEEE"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00047"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707223"},{"key":"e_1_3_2_1_15_1","unstructured":"Clarifai. 2025. NVIDIA B200 Vs. H100: Choosing The Right GPU For Your AI Workloads. https:\/\/www.clarifai.com\/blog\/nvidia-b200-vs-h100"},{"key":"e_1_3_2_1_16_1","unstructured":"Colfax International. 2024. NVIDIA ConnectX-7 InfiniBand Adapter Pricing. https:\/\/www.colfaxdirect.com\/store\/pc\/viewPrd.asp?idproduct=4072."},{"key":"e_1_3_2_1_17_1","volume-title":"Data Center Development Cost Guide","author":"Wakefield Cushman","year":"2025","unstructured":"Cushman & Wakefield. 2025. Data Center Development Cost Guide 2025. https:\/\/cushwake.cld.bz\/Data-Center-Development-Cost-Guide-2025\/8-9\/."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589351"},{"key":"e_1_3_2_1_19_1","unstructured":"Jacqueline Davis. 2024. Large data centers are mostly more efficient analysis confirms. https:\/\/journal.uptimeinstitute.com\/large-data-centers-are-mostly-more-efficient-analysis-confirms\/."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/0026-2692(95)98945-N"},{"key":"e_1_3_2_1_21_1","unstructured":"Depend Electronics. 2024. HBM Market Insight. https:\/\/depend-ele.com\/hbm-market-insight-2."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_23_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv e-prints (2024) arXiv-2407."},{"key":"e_1_3_2_1_24_1","unstructured":"EEWorld. 2025. SK Hynix's HBM4 price is revealed! 60% higher than HBM3E. https:\/\/en.eeworld.com.cn\/news\/manufacture\/eic703292.html."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 2024 USENIX Conf. on Usenix Annual Technical Conf. (USENIX ATC'24). USENIX Association, Article 45","author":"Feng Yinxiao","year":"2024","unstructured":"Yinxiao Feng, Yuchen Wei, Dong Xiang, and Kaisheng Ma. 2024. Evaluating chiplet-based large-scale interconnection networks via cycle-accurate packet-parallel simulation. In Proceedings of the 2024 USENIX Conf. on Usenix Annual Technical Conf. (USENIX ATC'24). USENIX Association, Article 45, 17 pages."},{"key":"e_1_3_2_1_26_1","unstructured":"FS.com. 2024a. InfiniBand Optical Transceivers. https:\/\/www.fs.com\/products\/354385.html."},{"key":"e_1_3_2_1_27_1","unstructured":"FS.com. 2024b. QM9700\/SN4600-Class InfiniBand Switch. https:\/\/www.fs.com\/products\/194710.html."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731100"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614312"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/EPTC62800.2024.10909776"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/2.30"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716267"},{"key":"e_1_3_2_1_33_1","volume-title":"Cambricon-U: A Systolic Random Increment Memory Architecture for Unary Computing. In 2023 56th IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO). 424-437","author":"Guo Hongrui","unstructured":"Hongrui Guo, Yongwei Zhao, Zhangmai Li, Yifan Hao, Chang Liu, Xinkai Song, Xiaqing Li, Zidong Du, Rui Zhang, Qi Guo, Tianshi Chen, and Zhiwei Xu. 2023. Cambricon-U: A Systolic Random Increment Memory Architecture for Unary Computing. In 2023 56th IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO). 424-437."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527408"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716009"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651380"},{"key":"e_1_3_2_1_37_1","volume-title":"Two Paths to Intelligence. Public Lecture","author":"Hinton Geoffrey","unstructured":"Geoffrey Hinton. 2023. Two Paths to Intelligence. Public Lecture, University of Cambridge (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Computer Arithmetic: Principles, Architecture and Design","author":"Hwang Kai","year":"1979","unstructured":"Kai Hwang. 1979. Computer Arithmetic: Principles, Architecture and Design. John Wiley & Sons, Inc., USA."},{"key":"e_1_3_2_1_39_1","unstructured":"Intel. 2020. Intel eASIC N5X Product Brief (PB-006). https:\/\/www.intel.com\/content\/www\/us\/en\/content-details\/633246\/intel-easic-n5x-product-brief-pb-006.html."},{"key":"e_1_3_2_1_40_1","unstructured":"Scotten Jones. 2020. LithoVision - Economics in the 3D Era. SemiWiki. https:\/\/semiwiki.com\/semiconductor-services\/techinsights\/283426-lithovision-economics-in-the-3d-era\/"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"key":"e_1_3_2_1_42_1","unstructured":"Bryan Kasprowicz. 2017. EUV Mask Technology and Economics: Impact of Mask Costs on Patterning Strategy. Presentation at the 2017 Int'l Workshop on EUV Lithography. https:\/\/www.euvlitho.com\/2017\/P33.pdf"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.51593\/20190014"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","unstructured":"Sangyeob Kim Sangjin Kim Wooyoung Jo Soyeon Kim Seongyon Hong and Hoi-Jun Yoo. 2024. C-Transformer: A 2.6-18.1uJ\/Token Homogeneous DNN-Transformer\/Spiking-Transformer Processor with Big-Little Network and Implicit Weight Generation for Large Language Models. In 2024 IEEE Int'l Solid-State Circuits Conf. (ISSCC) Vol. 67. 368-370. doi:10.1109\/ISSCC49657.2024.10454330","DOI":"10.1109\/ISSCC49657.2024.10454330"},{"key":"e_1_3_2_1_45_1","volume-title":"A Simple Model for Determining True Total Cost of Ownership for Data Centers. White Paper TUI3011B","author":"Koomey Jonathan","unstructured":"Jonathan Koomey, Kenneth Brill, Pitt Turner, John Stanley, and Bruce Taylor. 2007. A Simple Model for Determining True Total Cost of Ownership for Data Centers. White Paper TUI3011B. Uptime Institute. https:\/\/datacenters.lbl.gov\/sites\/default\/files\/%28TUI3011B%29SimpleModelDetermingTrueTCO.pdf"},{"key":"e_1_3_2_1_46_1","unstructured":"Yudhishthira Kundu Manroop Kaur Tripty Wig Kriti Kumar Pushpanjali Kumari Vivek Puri and Manish Arora. 2025. A Comparison of the Cerebras Wafer-Scale Integration Technology with Nvidia GPU-based Systems for Artificial Intelligence. arXiv:2503.11698 [cs.AR]"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640356"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3112025"},{"key":"e_1_3_2_1_49_1","unstructured":"Yaniv Leviathan Matan Kalman and Yossi Matias. 2023. Fast Inference from Transformers via Speculative Decoding. arXiv:2211.17192 [cs.LG] https:\/\/arxiv.org\/abs\/2211.17192"},{"key":"e_1_3_2_1_50_1","volume-title":"Ecoserve: Designing carbon-aware ai inference systems. arXiv preprint arXiv:2502.05043","author":"Li Yueying","year":"2025","unstructured":"Yueying Li, Zhanqiu Hu, Esha Choukse, Rodrigo Fonseca, G Edward Suh, and Udit Gupta. 2025. Ecoserve: Designing carbon-aware ai inference systems. arXiv preprint arXiv:2502.05043 (2025)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3256384"},{"key":"e_1_3_2_1_52_1","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437 (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731041"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.42"},{"key":"e_1_3_2_1_55_1","first-page":"1496","article-title":"VQ-LLM: High-performance Code Generation for Vector Quantization Augmented LLM Inference. In 2025 IEEE Int'l Symp. on High Performance Computer Architecture (HPCA)","author":"Liu Zihan","year":"2025","unstructured":"Zihan Liu, Xinhao Luo, Junxian Guo, Wentao Ni, Yangjie Zhou, Yue Guan, Cong Guo, Weihao Cui, Yu Feng, Minyi Guo, et al., 2025a. VQ-LLM: High-performance Code Generation for Vector Quantization Augmented LLM Inference. In 2025 IEEE Int'l Symp. on High Performance Computer Architecture (HPCA). IEEE, 1496-1509.","journal-title":"IEEE"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2008.7476551"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707215"},{"key":"e_1_3_2_1_58_1","unstructured":"Sebastian Moss. 2025. Cerebras unveils four trillion-transistor giant chip. https:\/\/www.datacenterdynamics.com\/en\/news\/cerebras-unveils-four-trillion-transistor-giant-chip-targets-generative-ai\/"},{"key":"e_1_3_2_1_59_1","first-page":"73","article-title":"Printed machine learning classifiers. In 2020 53rd Annual IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO)","author":"Mubarik Muhammad Husnain","year":"2020","unstructured":"Muhammad Husnain Mubarik, Dennis D Weller, Nathaniel Bleier, Matthew Tomei, Jasmin Aghassi-Hagmann, Mehdi B Tahoori, and Rakesh Kumar. 2020. Printed machine learning classifiers. In 2020 53rd Annual IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO). IEEE, 73-87.","journal-title":"IEEE"},{"key":"e_1_3_2_1_60_1","unstructured":"Daniel Nenni. 2025. TSMC N5 Process Technology Wiki. https:\/\/semiwiki.com\/wikis\/industry-wikis\/tsmc-n5-process-technology-5nm-wiki\/."},{"key":"e_1_3_2_1_61_1","unstructured":"NVIDIA. 2024. NVIDIA AI Enterprise Licensing Guide. https:\/\/docs.nvidia.com\/ai-enterprise\/planning-resource\/licensing-guide\/latest\/pricing.html."},{"key":"e_1_3_2_1_62_1","unstructured":"US Department of Energy. 2024. DOE Releases New Report Evaluating Increase in Electricity Demand from Data Centers. (2024). https:\/\/www.energy.gov\/articles\/doe-releases-new-report-evaluating-increase-electricity-demand-data-centers"},{"key":"e_1_3_2_1_63_1","unstructured":"OpenAI. 2024a. OpenAI and NVIDIA Announce Systems Partnership. https:\/\/openai.com\/index\/openai-nvidia-systems-partnership\/."},{"key":"e_1_3_2_1_64_1","unstructured":"OpenAI. 2024b. OpenAI DevDay. https:\/\/openai.com\/devday\/."},{"key":"e_1_3_2_1_65_1","unstructured":"OpenAI. 2025. Introducing GPT-OSS. https:\/\/openai.com\/index\/introducing-gpt-oss\/."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-36104-z"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41928-020-0437-5"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.4071\/isom-2013-WA11"},{"key":"e_1_3_2_1_69_1","unstructured":"Dylan Patel. 2022. The Dark Side Of The Semiconductor Design Renaissance \u2013 Fixed Costs Soaring Due To Photomask Sets Verification and Validation. https:\/\/newsletter.semianalysis.com\/p\/the-dark-side-of-the-semiconductor"},{"key":"e_1_3_2_1_70_1","unstructured":"Dylan Patel and Gerald Wong. 2023. AI Server Cost Analysis - Memory Is The Biggest Loser. https:\/\/newsletter.semianalysis.com\/p\/ai-server-cost-analysis-memory-is."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49661.2025.10904774"},{"key":"e_1_3_2_1_72_1","volume-title":"n.d","author":"Rasmussen Neil","unstructured":"Neil Rasmussen. n.d.. Determining Total Cost of Ownership for Data Center and Network Room Infrastructure. White Paper 6 (Revision 4). APC by Schneider Electric. https:\/\/www.zones.com\/images\/pdf\/apc_infrastruxure02_wp.pdf"},{"key":"e_1_3_2_1_73_1","unstructured":"Router-Switch.com. 2024. Supermicro GPU Server Solutions. https:\/\/www.router-switch.com\/supermicro-servers.html."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533040"},{"key":"e_1_3_2_1_75_1","unstructured":"Glen Scheid. 2023. Economics of Mask. https:\/\/www.ebeam.org\/docs\/economics-of-mask.pdf."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/VLSITechnologyandCir46769.2022.9830194"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716025"},{"key":"e_1_3_2_1_78_1","unstructured":"Kimi Team Yifan Bai Yiping Bao Guanduo Chen Jiahao Chen Ningxin Chen Ruijue Chen Yanru Chen Yuankun Chen Yutian Chen et al. 2025. Kimi k2: Open agentic intelligence. arXiv preprint arXiv:2507.20534 (2025)."},{"key":"e_1_3_2_1_79_1","unstructured":"Qwen Team. 2025. QwQ-32B: Embracing the Power of Reinforcement Learning. https:\/\/qwenlm.github.io\/blog\/qwq-32b\/."},{"key":"e_1_3_2_1_80_1","unstructured":"Thinkmate. 2024. SuperServer 821GE-TNHR System Specifications. https:\/\/www.thinkmate.com\/system\/superserver-821ge-tnhr."},{"key":"e_1_3_2_1_81_1","unstructured":"U.S. Energy Information Administration. 2024. Electric Power Monthly: Average Retail Price of Electricity. https:\/\/www.eia.gov\/electricity\/monthly\/epm_table_grapher.php?t=epmt_5_6_a."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.mejo.2022.105481"},{"key":"e_1_3_2_1_83_1","volume-title":"ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_84_1","first-page":"1247","article-title":"SOFA: A compute-memory optimized sparsity accelerator via cross-stage coordinated tiling. In 2024 57th IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO)","author":"Wang Huizheng","year":"2024","unstructured":"Huizheng Wang, Jiahao Fang, Xinru Tang, Zhiheng Yue, Jinxi Li, Yubin Qin, Sihan Guan, Qinze Yang, Yang Wang, Chao Li, et al., 2024. SOFA: A compute-memory optimized sparsity accelerator via cross-stage coordinated tiling. In 2024 57th IEEE\/ACM Int'l Symp. on Microarchitecture (MICRO). IEEE, 1247-1263.","journal-title":"IEEE"},{"key":"e_1_3_2_1_85_1","unstructured":"James Wang. 2024. Cerebras CS-3: the world's fastest and most scalable AI accelerator. https:\/\/www.cerebras.ai\/blog\/cerebras-cs3"},{"key":"e_1_3_2_1_86_1","first-page":"1","article-title":"A 28nm 27.5 TOPS\/W approximate-computing-based transformer processor with asymptotic sparsity speculating and out-of-order computing. In 2022 IEEE Int'l solid-state circuits Conf. (ISSCC), Vol. 65","author":"Wang Yang","year":"2022","unstructured":"Yang Wang, Yubin Qin, Dazheng Deng, Jingchuan Wei, Yang Zhou, Yuanqi Fan, Tianbao Chen, Hao Sun, Leibo Liu, Shaojun Wei, et al., 2022. A 28nm 27.5 TOPS\/W approximate-computing-based transformer processor with asymptotic sparsity speculating and out-of-order computing. In 2022 IEEE Int'l solid-state circuits Conf. (ISSCC), Vol. 65. IEEE, 1-3.","journal-title":"IEEE"},{"key":"e_1_3_2_1_87_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph Sebastian Borgeaud Dani Yogatama Maarten Bosma Denny Zhou Donald Metzler et al. 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_1_88_1","volume-title":"Automation & Test in Europe Conf. & Exhibition (DATE). IEEE, 914-919","author":"Weller Dennis D","year":"2021","unstructured":"Dennis D Weller, Nathaniel Bleier, Michael Hefenbrock, Jasmin Aghassi-Hagmann, Michael Beigl, Rakesh Kumar, and Mehdi B Tahoori. 2021. Printed stochastic computing neural networks. In 2021 Design, Automation & Test in Europe Conf. & Exhibition (DATE). IEEE, 914-919."},{"key":"e_1_3_2_1_89_1","unstructured":"Wikipedia contributors. 2025. Etched.ai - Sohu (transformer ASIC). https:\/\/en.wikipedia.org\/wiki\/Etched.ai."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.adl1203"},{"key":"e_1_3_2_1_91_1","first-page":"521","volume-title":"16th USENIX Symp. on Operating Systems Design and Implementation (OSDI 22)","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. 2022. Orca: A distributed serving system for Transformer-Based generative models. In 16th USENIX Symp. on Operating Systems Design and Implementation (OSDI 22). 521-538."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00108"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49661.2025.10904702"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2003.1240929"},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731412"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-021-3596-x"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00061"},{"key":"e_1_3_2_1_98_1","first-page":"1005","article-title":"Alisa: Accelerating large language model inference via sparsity-aware kv caching. In 2024 ACM\/IEEE 51st Annual Int'l Symp. on Computer Architecture (ISCA)","author":"Zhao Youpeng","year":"2024","unstructured":"Youpeng Zhao, Di Wu, and Jun Wang. 2024a. Alisa: Accelerating large language model inference via sparsity-aware kv caching. In 2024 ACM\/IEEE 51st Annual Int'l Symp. on Computer Architecture (ISCA). IEEE, 1005-1017.","journal-title":"IEEE"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3658482"}],"event":{"name":"ASPLOS '26: 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Pittsburgh PA USA","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T14:07:51Z","timestamp":1773583671000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779212.3790169"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,22]]},"references-count":99,"alternative-id":["10.1145\/3779212.3790169","10.1145\/3779212"],"URL":"https:\/\/doi.org\/10.1145\/3779212.3790169","relation":{},"subject":[],"published":{"date-parts":[[2026,3,22]]},"assertion":[{"value":"2026-03-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}