{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T11:24:16Z","timestamp":1777461856565,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3805621.3807654","type":"proceedings-article","created":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:08:45Z","timestamp":1777381725000},"page":"225-231","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Pooling Engram Conditional Memory in Large Language Models using CXL"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9067-9538","authenticated-orcid":false,"given":"Ruiyang","family":"Ma","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7104-1526","authenticated-orcid":false,"given":"Teng","family":"Ma","sequence":"additional","affiliation":[{"name":"Alibaba Cloud Computing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2519-617X","authenticated-orcid":false,"given":"Zhiyuan","family":"Su","sequence":"additional","affiliation":[{"name":"Shandong Yingxin Computer Technology Co., Ltd, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1446-2208","authenticated-orcid":false,"given":"Hantian","family":"Zha","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7857-5330","authenticated-orcid":false,"given":"Xinpeng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Alibaba Cloud Computing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4014-2251","authenticated-orcid":false,"given":"Xuchun","family":"Shang","sequence":"additional","affiliation":[{"name":"Alibaba Cloud Computing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0590-2489","authenticated-orcid":false,"given":"Xingrui","family":"Yi","sequence":"additional","affiliation":[{"name":"Alibaba Cloud Computing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0911-267X","authenticated-orcid":false,"given":"Zheng","family":"Liu","sequence":"additional","affiliation":[{"name":"Alibaba Cloud Computing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5125-0193","authenticated-orcid":false,"given":"Zhu","family":"Cao","sequence":"additional","affiliation":[{"name":"Shandong Yingxin Computer Technology Co., Ltd, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0227-2114","authenticated-orcid":false,"given":"An","family":"Wu","sequence":"additional","affiliation":[{"name":"Shandong Yingxin Computer Technology Co., Ltd, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2624-8225","authenticated-orcid":false,"given":"Zhichong","family":"Dou","sequence":"additional","affiliation":[{"name":"Shandong Yingxin Computer Technology Co., Ltd, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0292-7155","authenticated-orcid":false,"given":"Ziqian","family":"Liu","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5651-7721","authenticated-orcid":false,"given":"Daikang","family":"Kuang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4932-3655","authenticated-orcid":false,"given":"Guojie","family":"Luo","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,4,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"KVDirect: Distributed disaggregated LLM inference. arXiv preprint arXiv:2501.14743","author":"Chen Shiyang","year":"2024","unstructured":"Shiyang Chen, Rain Jiang, Dezhi Yu, Jinlai Xu, Mengyuan Chao, Fanlong Meng, Chenyu Jiang, Wei Xu, and Hang Liu. 2024. KVDirect: Distributed disaggregated LLM inference. arXiv preprint arXiv:2501.14743 (2024)."},{"key":"e_1_3_2_1_2_1","unstructured":"Xin Cheng Wangding Zeng Damai Dai Qinyu Chen Bingxuan Wang Zhenda Xie Kezhao Huang Xingkai Yu Zhewen Hao Yukun Li et al. 2026. Conditional memory via scalable lookup: A new axis of sparsity for large language models. arXiv preprint arXiv:2601.07372 (2026)."},{"key":"e_1_3_2_1_3_1","unstructured":"Counterpoint Research. 2026. Memory Prices Soar by 50% in Q4 Rally to Continue in 2026: The Hyper-Bull Phase. Technical Report. Counterpoint Research. https:\/\/www.counterpointresearch.com\/insights\/memory-price-tracker-january-2026\/ Accessed: Feb 2026. Highlights target price of $700 for 64GB RDIMM by March 2026 due to AI-driven supply constraints."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3661821"},{"key":"e_1_3_2_1_5_1","volume-title":"Memtunnel: A CXL-based rack-scale host memory pooling architecture for cloud service","author":"Guan Tianchan","year":"2025","unstructured":"Tianchan Guan, Yijin Guan, Zhaoyang Du, Jiacheng Ma, Boyu Tian, Zhao Wang, Teng Ma, Zheng Liu, Yang Kong, Yuan Xie, et al. 2025. Memtunnel: A CXL-based rack-scale host memory pooling architecture for cloud service. IEEE Transactions on Parallel and Distributed Systems (2025)."},{"key":"e_1_3_2_1_6_1","volume-title":"19th USENIX Symposium on Operating Systems Design and Implementation (OSDI 25)","author":"Huang Yibo","year":"2025","unstructured":"Yibo Huang, Haowei Chen, Newton Ni, Yan Sun, Vijay Chidambaram, Dixin Tang, and Emmett Witchel. 2025. Tigon: A Distributed Database for a CXL Pod. In 19th USENIX Symposium on Operating Systems Design and Implementation (OSDI 25). 109\u2013128."},{"key":"e_1_3_2_1_7_1","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Kalia Anuj","year":"2019","unstructured":"Anuj Kalia, Michael Kaminsky, and David Andersen. 2019. Datacenter {RPCs} can be general and fast. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). 1\u201316."},{"key":"e_1_3_2_1_8_1","volume-title":"Bauhaus: Restructuring Vector Database for LLM Retrieval on CXL-Based Tiered Memory","author":"Kim Kyungbin","year":"2026","unstructured":"Kyungbin Kim, Sungsu Ahn, Wonjung Jeong, Jongmin Kim, Sangun Choi, Minseong Gil, Minseong Kim, Dongha Jung, Yunjay Hong, Haekang Jung, et al. 2026. Bauhaus: Restructuring Vector Database for LLM Retrieval on CXL-Based Tiered Memory. IEEE Trans. Comput. (2026)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_2_1_10_1","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024. DeepSeek-V3 technical report. arXiv preprint arXiv:2412.19437 (2024)."},{"key":"e_1_3_2_1_11_1","unstructured":"Aixin Liu Aoxue Mei Bangcai Lin Bing Xue Bingxuan Wang Bingzheng Xu Bochao Wu Bowei Zhang Chaofan Lin Chen Dong et al. 2025. DeepSeek-V3.2: Pushing the frontier of open large language models. arXiv preprint arXiv:2512.02556 (2025)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672274"},{"key":"e_1_3_2_1_13_1","volume-title":"23rd USENIX conference on file and storage technologies (FAST 25)","author":"Qin Ruoyu","year":"2025","unstructured":"Ruoyu Qin, Zheming Li, Weiran He, Jialei Cui, Feng Ren, Mingxing Zhang, Yongwei Wu, Weimin Zheng, and Xinran Xu. 2025. Mooncake: Trading more storage for less computation\u2014a KVCache-centric architecture for serving LLM chatbot. In 23rd USENIX conference on file and storage technologies (FAST 25). 155\u2013170."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"1","author":"Quinn Derrick","year":"2025","unstructured":"Derrick Quinn, Mohammad Nouri, Neel Patel, John Salihu, Alireza Salemi, Sukhan Lee, Hamed Zamani, and Mohammad Alian. 2025. Accelerating retrieval-augmented generation. In Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1. 15\u201332."},{"key":"e_1_3_2_1_15_1","unstructured":"Qwen Team. 2025. Qwen3 Technical Report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3690825"},{"key":"e_1_3_2_1_17_1","unstructured":"XConn Technologies. 2023. XC50256: World's First Hybrid CXL 2.0 and PCIe Gen5 Switch IC. https:\/\/www.xconn-tech.com\/product."},{"key":"e_1_3_2_1_18_1","volume-title":"Beluga: A CXL-Based Memory Architecture for Scalable and Efficient LLM KVCache Management. arXiv preprint arXiv:2511.20172","author":"Yang Xinjun","year":"2025","unstructured":"Xinjun Yang, Qingda Hu, Junru Li, Feifei Li, Yicong Zhu, Yuqi Zhou, Qiuru Lin, Jian Dai, Yang Kong, Jiayu Zhang, et al. 2025. Beluga: A CXL-Based Memory Architecture for Scalable and Efficient LLM KVCache Management. arXiv preprint arXiv:2511.20172 (2025)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3722212.3724460"},{"key":"e_1_3_2_1_20_1","unstructured":"Dongha Yoon Younghoon Min Hoshik Kim Sam H Noh and Jongryool Kim. 2025. TraCT: Disaggregated LLM Serving with CXL Shared Memory KV Cache at Rack-Scale. arXiv preprint arXiv:2512.18194 (2025)."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI).","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liungsheng Zheng, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Haotong Li, Xuanhe Zhou, Banghua Zhu, Joseph E. Gonzalez, Ion Stoica, Hao Zhang, et al. 2024. SGLang: Efficient Execution of Structured Language Model Programs. In Proceedings of the 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI)."},{"key":"e_1_3_2_1_22_1","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"Zhong Yuhong","year":"2024","unstructured":"Yuhong Zhong, Daniel S Berger, Carl Waldspurger, Ryan Wee, Ishwar Agarwal, Rajat Agarwal, Frank Hady, Karthik Kumar, Mark D Hill, Mosharaf Chowdhury, et al. 2024. Managing memory tiers with CXL in virtualized environments. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24). 37\u201356."}],"event":{"name":"EuroSys '26: 21st European Conference on Computer Systems","location":"Edinburgh Scotland Uk","acronym":"EuroMLSys '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Sixth European Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3805621.3807654","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:15:47Z","timestamp":1777382147000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805621.3807654"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,27]]},"references-count":22,"alternative-id":["10.1145\/3805621.3807654","10.1145\/3805621"],"URL":"https:\/\/doi.org\/10.1145\/3805621.3807654","relation":{},"subject":[],"published":{"date-parts":[[2026,4,27]]},"assertion":[{"value":"2026-04-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}