{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:33:49Z","timestamp":1775745229965,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":10,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"name":"European Union?s Horizon Programme","award":["101047160"],"award-info":[{"award-number":["101047160"]}]},{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["1652294, 190860"],"award-info":[{"award-number":["1652294, 190860"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"AI Chip Center for Emerging Smart Systems Limited (ACCESS)"},{"DOI":"10.13039\/501100006374","name":"Intel Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"ETH Future Computing Laboratory (EFCL)"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3676641.3716267","type":"proceedings-article","created":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T16:47:32Z","timestamp":1743094052000},"page":"862-881","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["PIM Is All You Need: A CXL-Enabled GPU-Free System for Large Language Model Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1918-7533","authenticated-orcid":false,"given":"Yufeng","family":"Gu","sequence":"first","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7615-5514","authenticated-orcid":false,"given":"Alireza","family":"Khadem","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1893-2845","authenticated-orcid":false,"given":"Sumanth","family":"Umesh","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5700-4991","authenticated-orcid":false,"given":"Ning","family":"Liang","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3830-7219","authenticated-orcid":false,"given":"Xavier","family":"Servot","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0075-2312","authenticated-orcid":false,"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5383-9561","authenticated-orcid":false,"given":"Ravi","family":"Iyer","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5894-8342","authenticated-orcid":false,"given":"Reetuparna","family":"Das","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Azure pricing calculator. URL: https:\/\/azure.microsoft.com\/en-us\/pricing\/calculator\/."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533737.3535090"},{"key":"e_1_3_2_1_3_1","volume-title":"Gqa: Training generalized multi-query transformer models from multi-head checkpoints","author":"Ainslie Joshua","year":"2023","unstructured":"Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebr\u00f3n, and Sumit Sanghai. Gqa: Training generalized multi-query transformer models from multi-head checkpoints, 2023. URL: https:\/\/arxiv.org\/abs\/2305.13245, arXiv:2305.13245."},{"key":"e_1_3_2_1_4_1","unstructured":"Anthropic. Introducing the next generation of claude. URL: https: \/\/www.anthropic.com\/news\/claude-3-family."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545054"},{"key":"e_1_3_2_1_6_1","unstructured":"Thomas Atta-fosu. Llama 2 70b: An mlperf inference benchmark for large language models. URL: https:\/\/mlcommons.org\/2024\/03\/mlperf-llama2-70b\/."},{"issue":"3","key":"e_1_3_2_1_7_1","first-page":"8","article-title":"Improving image generation with better captions","volume":"2","author":"Betker James","year":"2023","unstructured":"James Betker, Gabriel Goh, Li Jing, Tim Brooks, Jianfeng Wang, Linjie Li, Long Ouyang, Juntang Zhuang, Joyce Lee, Yufei Guo, Wesam Manassra, Prafulla Dhariwal, Casey Chu, Yunxin Jiao, and Aditya Ramesh. Improving image generation with better captions. Computer Science., 2(3):8, 2023. URL: https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf.","journal-title":"Computer Science."},{"key":"e_1_3_2_1_8_1","unstructured":"Broadcom. 144-lane 72-port pci express gen 5.0 pex89144 express-fabric platform. URL: https:\/\/www.broadcom.com\/products\/pcie-switches-bridges\/expressfabric\/gen5\/pex89144."},{"key":"e_1_3_2_1_9_1","unstructured":"Christopher Celio Krste Asanovic and David Patterson. The berkeley out-of-order machine (boom): An open-source industry-competitive synthesizable parameterized risc-v processor. URL:https:\/\/riscv.org\/wp-content\/uploads\/2016\/01\/Wed1345-RISCV-Workshop-3-BOOM.pdf."},{"key":"e_1_3_2_1_10_1","unstructured":"Christopher Celio Pi-Feng Chiu Borivoje Nikolic David A. Patterson and Krste Asanovic. BOOM v2: an open-source out-of-order RISC-V core. Technical Report UCB\/EECS-2017-157 EECS Department University of California Berkeley Sep 2017. URL: http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2017\/EECS-2017-157.html."}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716267","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676641.3716267","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:12:17Z","timestamp":1755774737000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716267"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":10,"alternative-id":["10.1145\/3676641.3716267","10.1145\/3676641"],"URL":"https:\/\/doi.org\/10.1145\/3676641.3716267","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}