{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:43:29Z","timestamp":1775231009318,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"US DOE Office of Science","award":["DEAC06-76RL01830"],"award-info":[{"award-number":["DEAC06-76RL01830"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,30]]},"DOI":"10.1145\/3695794.3695809","type":"proceedings-article","created":{"date-parts":[[2024,12,12]],"date-time":"2024-12-12T04:06:53Z","timestamp":1733976413000},"page":"172-177","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Performance Study of CXL Memory Topology"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2283-806X","authenticated-orcid":false,"given":"Jianbo","family":"Wu","sequence":"first","affiliation":[{"name":"University of California, Merced, Merced, CA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6560-0427","authenticated-orcid":false,"given":"Jie","family":"Liu","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9105-5634","authenticated-orcid":false,"given":"Gokcen","family":"Kestor","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, WA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9430-2656","authenticated-orcid":false,"given":"Roberto","family":"Gioiosa","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, WA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9336-0694","authenticated-orcid":false,"given":"Dong","family":"Li","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4313-1882","authenticated-orcid":false,"given":"Andres","family":"Marquez","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,12,11]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Intel Analytics. 2023. IPEX-LLM: Accelerate local LLM inference and finetuning on Intel CPU and GPU. https:\/\/github.com\/intel-analytics\/ipex-llm."},{"key":"e_1_3_3_1_3_2","unstructured":"Andi Kleen (SUSE Labs). [n. d.]. NUMA Support for Linux. https:\/\/github.com\/numactl\/numactl."},{"key":"e_1_3_3_1_4_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_1_5_2","volume-title":"CXL Memory Expansion, Pooling, Sharing, FAM Enablement, and Switching","author":"Do Tam","year":"2023","unstructured":"Tam Do and Sanketh Srinivas. 2023. CXL Memory Expansion, Pooling, Sharing, FAM Enablement, and Switching. Open Compute Project. https:\/\/www.youtube.com\/watch?v=VCYSzBFCBnQ Presented by Tam Do (Microchip) and Sanketh Srinivas (Microchip), Open Compute Project."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Yehonatan Fridman Suprasad\u00a0Mutalik Desai Navneet Singh Thomas Willhalm and Gal Oren. 2023. CXL Memory as Persistent Memory for Disaggregated HPC: A Practical Approach. arxiv:https:\/\/arXiv.org\/abs\/2308.10714\u00a0[cs.DC]","DOI":"10.1145\/3624062.3624175"},{"key":"e_1_3_3_1_7_2","unstructured":"Sunita Jain Nagaradhesh Yeleswarapu Hasan\u00a0Al Maruf and Rita Gupta. 2024. Memory Sharing with CXL: Hardware and Software Design Approaches. arxiv:https:\/\/arXiv.org\/abs\/2404.03245\u00a0[cs.ET]"},{"key":"e_1_3_3_1_8_2","unstructured":"Albert\u00a0Q Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier et\u00a0al. 2023. Mistral 7B. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.06825 (2023)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_3_1_10_2","unstructured":"Jie Liu Xi Wang Jianbo Wu Shuangyan Yang Jie Ren Bhanu Shankar and Dong Li. 2024. Exploring and Evaluating Real-world CXL: Use Cases and System Adoption. arxiv:https:\/\/arXiv.org\/abs\/2405.14209\u00a0[cs.PF]"},{"key":"e_1_3_3_1_11_2","unstructured":"Meta Llama. 2024. Llama3 Model Card. https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md. Accessed: 2024-05-31."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"J\u00fanior L\u00f6ff Dalvan Griebler Gabriele Mencagli Gabriell Araujo Massimo Torquati Marco Danelutto and Luiz\u00a0Gustavo Fernandes. 2021. The NAS parallel benchmarks for evaluating C++ parallel programming frameworks on shared-memory architectures. Future Generation Computer Systems 125 (2021) 743\u2013757.","DOI":"10.1016\/j.future.2021.07.021"},{"key":"e_1_3_3_1_13_2","volume-title":"STREAM: Sustainable Memory Bandwidth in High Performance Computers","author":"McCalpin John\u00a0D.","year":"1991","unstructured":"John\u00a0D. McCalpin. 1991-2007. STREAM: Sustainable Memory Bandwidth in High Performance Computers. Technical Report. University of Virginia, Charlottesville, Virginia. http:\/\/www.cs.virginia.edu\/stream\/ A continually updated technical report. http:\/\/www.cs.virginia.edu\/stream\/."},{"key":"e_1_3_3_1_14_2","first-page":"551","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza\u00a0Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. { Zero-offload} : Democratizing { billion-scale} model training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 551\u2013564."},{"key":"e_1_3_3_1_15_2","unstructured":"Debendra\u00a0Das Sharma Robert Blankenship and Daniel\u00a0S. Berger. 2024. An Introduction to the Compute Express Link (CXL) Interconnect. arxiv:https:\/\/arXiv.org\/abs\/2306.11227\u00a0[cs.AR]"},{"key":"e_1_3_3_1_16_2","unstructured":"Richard Solomon. 2023. CXL: How an Accelerator Link Caused a Memory Revolution. Synopsys IP Technical Bulletin (2023). https:\/\/www.synopsys.com\/designware-ip\/technical-bulletin\/cxl-in-memory-solutions.html"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614256"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650061"},{"key":"e_1_3_3_1_19_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arxiv:https:\/\/arXiv.org\/abs\/2302.13971\u00a0[cs.CL]"},{"key":"e_1_3_3_1_20_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et\u00a0al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09288 (2023)."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC56545.2022.00007"},{"key":"e_1_3_3_1_22_2","unstructured":"Jianbo Wu Jie Ren Shuangyan Yang Konstantinos Parasyris Giorgis Georgakoudis Ignacio Laguna and Dong Li. [n. d.]. LM-Offload: Performance Model-Guided Generative Inference of Large Language Models with Parallelism Control. ([n. d.])."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575725"},{"key":"e_1_3_3_1_24_2","unstructured":"Yiwei Yang Pooneh Safayenikoo Jiacheng Ma Tanvir\u00a0Ahmed Khan and Andrew Quinn. 2023. CXLMemSim: A pure software simulated CXL. mem for performance characterization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.06153 (2023)."}],"event":{"name":"MEMSYS '24: The International Symposium on Memory Systems","location":"Washington DC USA","acronym":"MEMSYS '24"},"container-title":["Proceedings of the International Symposium on Memory Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695794.3695809","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695794.3695809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:06Z","timestamp":1750295406000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695794.3695809"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"references-count":23,"alternative-id":["10.1145\/3695794.3695809","10.1145\/3695794"],"URL":"https:\/\/doi.org\/10.1145\/3695794.3695809","relation":{},"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"2024-12-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}