{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:22:52Z","timestamp":1773840172629,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T00:00:00Z","timestamp":1720396800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,8]]},"DOI":"10.1145\/3655038.3665953","type":"proceedings-article","created":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T00:19:48Z","timestamp":1719447588000},"page":"108-115","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Breaking Barriers: Expanding GPU Memory with Sub-Two Digit Nanosecond Latency CXL Controller"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4229-0101","authenticated-orcid":false,"given":"Donghyun","family":"Gouk","sequence":"first","affiliation":[{"name":"Panmnesia, Inc."}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4153-7026","authenticated-orcid":false,"given":"Seungkwan","family":"Kang","sequence":"additional","affiliation":[{"name":"Computer Architecture and Memory Systems Laboratory, KAIST"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0946-9986","authenticated-orcid":false,"given":"Hanyeoreum","family":"Bae","sequence":"additional","affiliation":[{"name":"Computer Architecture and Memory Systems Laboratory, KAIST"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3546-0896","authenticated-orcid":false,"given":"Eojin","family":"Ryu","sequence":"additional","affiliation":[{"name":"Panmnesia, Inc."}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6960-5487","authenticated-orcid":false,"given":"Sangwon","family":"Lee","sequence":"additional","affiliation":[{"name":"Panmnesia, Inc."}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7951-7269","authenticated-orcid":false,"given":"Dongpyung","family":"Kim","sequence":"additional","affiliation":[{"name":"Panmnesia, Inc."}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3053-396X","authenticated-orcid":false,"given":"Junhyeok","family":"Jang","sequence":"additional","affiliation":[{"name":"Computer Architecture and Memory Systems Laboratory, KAIST"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9832-5801","authenticated-orcid":false,"given":"Myoungsoo","family":"Jung","sequence":"additional","affiliation":[{"name":"Computer Architecture and Memory Systems Laboratory, KAIST and Panmnesia, Inc."}]}],"member":"320","published-online":{"date-parts":[[2024,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533727"},{"key":"e_1_3_2_1_2_1","volume-title":"M6-t: Exploring sparse expert models and beyond. arXiv preprint arXiv:2105.15082","author":"Yang An","year":"2021","unstructured":"An Yang, Junyang Lin, Rui Men, Chang Zhou, Le Jiang, Xianyan Jia, Ang Wang, Jie Zhang, Jiamang Wang, Yong Li, et al. M6-t: Exploring sparse expert models and beyond. arXiv preprint arXiv:2105.15082, 2021."},{"key":"e_1_3_2_1_3_1","volume-title":"Galactica: A large language model for science. arXiv preprint arXiv:2211.09085","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. Galactica: A large language model for science. arXiv preprint arXiv:2211.09085, 2022."},{"key":"e_1_3_2_1_4_1","volume-title":"Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al. Lamda: Language models for dialog applications. arXiv preprint arXiv:2201.08239","author":"Thoppilan Romal","year":"2022","unstructured":"Romal Thoppilan, Daniel De Freitas, Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al. Lamda: Language models for dialog applications. arXiv preprint arXiv:2201.08239, 2022."},{"key":"e_1_3_2_1_5_1","volume-title":"Palm 2 technical report. arXiv preprint arXiv:2305.10403","author":"Anil Rohan","year":"2023","unstructured":"Rohan Anil, Andrew M Dai, Orhan Firat, Melvin Johnson, Dmitry Lepikhin, Alexandre Passos, Siamak Shakeri, Emanuel Taropa, Paige Bailey, Zhifeng Chen, et al. Palm 2 technical report. arXiv preprint arXiv:2305.10403, 2023."},{"key":"e_1_3_2_1_6_1","volume-title":"Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al. Palm-e: An embodied multimodal language model. arXiv preprint arXiv:2303.03378","author":"Driess Danny","year":"2023","unstructured":"Danny Driess, Fei Xia, Mehdi SM Sajjadi, Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al. Palm-e: An embodied multimodal language model. arXiv preprint arXiv:2303.03378, 2023."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_2_1_8_1","first-page":"387","volume-title":"19th USENIX Conference on File and Storage Technologies (FAST 21)","author":"Bae Jonghyun","year":"2021","unstructured":"Jonghyun Bae, Jongsung Lee, Yunho Jin, Sam Son, Shine Kim, Hakbeom Jang, Tae Jun Ham, and Jae W Lee. {FlashNeuron}:{SSD-Enabled}{Large-Batch} training of very deep neural networks. In 19th USENIX Conference on File and Storage Technologies (FAST 21), pages 387--401, 2021."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_2_1_10_1","first-page":"412","article-title":"Distributed hierarchical gpu parameter server for massive scale deep learning ads systems","volume":"2","author":"Zhao Weijie","year":"2020","unstructured":"Weijie Zhao, Deping Xie, Ronglai Jia, Yulei Qian, Ruiquan Ding, Mingming Sun, and Ping Li. Distributed hierarchical gpu parameter server for massive scale deep learning ads systems. Proceedings of Machine Learning and Systems, 2:412--428, 2020.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"e_1_3_2_1_12_1","first-page":"13","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE)","author":"Chen Xiaoming","year":"2018","unstructured":"Xiaoming Chen, Danny Z Chen, and Xiaobo Sharon Hu. modnn: Memory optimal dnn training on gpus. In 2018 Design, Automation & Test in Europe Conference & Exhibition (DATE), pages 13--18. IEEE, 2018."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"key":"e_1_3_2_1_15_1","unstructured":"NVIDIA. Nvidia gpudirect."},{"key":"e_1_3_2_1_16_1","unstructured":"NVIDIA. Cuda toolkit documentation."},{"key":"e_1_3_2_1_17_1","volume-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"Abadi Mart\u00edn","year":"2015","unstructured":"Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Greg S. Corrado, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Ian Goodfellow, Andrew Harp, Geoffrey Irving, Michael Isard, Yangqing Jia, Rafal Jozefowicz, Lukasz Kaiser, Manjunath Kudlur, Josh Levenberg, Dandelion Man\u00e9, Rajat Monga, Sherry Moore, Derek Murray, Chris Olah, Mike Schuster, Jonathon Shlens, Benoit Steiner, Ilya Sutskever, Kunal Talwar, Paul Tucker, Vincent Vanhoucke, Vijay Vasudevan, Fernanda Vi\u00e9gas, Oriol Vinyals, Pete Warden, Martin Wattenberg, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. TensorFlow: Large-scale machine learning on heterogeneous systems, 2015. Software available from tensorflow.org."},{"key":"e_1_3_2_1_18_1","volume-title":"Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315","author":"Wang Minjie","year":"2019","unstructured":"Minjie Wang, Da Zheng, Zihao Ye, Quan Gan, Mufei Li, Xiang Song, Jinjing Zhou, Chao Ma, Lingfan Yu, Yu Gai, Tianjun Xiao, Tong He, George Karypis, Jinyang Li, and Zheng Zhang. Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315, 2019."},{"key":"e_1_3_2_1_19_1","volume-title":"Ddr sdram standard jesd79-4d","author":"JEDEC.","year":"2021","unstructured":"JEDEC. Ddr sdram standard jesd79-4d, 2021."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3538643.3539745"},{"key":"e_1_3_2_1_21_1","first-page":"601","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Yang Shao-Peng","year":"2023","unstructured":"Shao-Peng Yang, Minjae Kim, Sanghyun Nam, Juhyung Park, Jinyong Choi, Eyee Hyun Nam, Eunji Lee, Sungjin Lee, and Bryan S Kim. Overcoming the memory wall with {CXL-Enabled}{SSDs}. In 2023 USENIX Annual Technical Conference (USENIX ATC 23), pages 601--617, 2023."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3599691.3603406"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533737.3535090"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3240774"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582063"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614256"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3237491"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480128"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00070"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417050"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378505"},{"key":"e_1_3_2_1_33_1","volume-title":"Nvidia h100 tensor core gpu architecture","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. Nvidia h100 tensor core gpu architecture, 2022."},{"key":"e_1_3_2_1_34_1","volume-title":"Pipedream: Fast and efficient pipeline parallel dnn training. arXiv preprint arXiv:1806.03377","author":"Harlap Aaron","year":"2018","unstructured":"Aaron Harlap, Deepak Narayanan, Amar Phanishayee, Vivek Seshadri, Nikhil Devanur, Greg Ganger, and Phil Gibbons. Pipedream: Fast and efficient pipeline parallel dnn training. arXiv preprint arXiv:1806.03377, 2018."},{"key":"e_1_3_2_1_35_1","volume-title":"et al. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems, 32","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_36_1","volume-title":"Megatron-lm: Training multibillion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. Megatron-lm: Training multibillion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053, 2019."},{"key":"e_1_3_2_1_37_1","first-page":"551","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. {Zero-offload}: Democratizing {billion-scale} model training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21), pages 551--564, 2021."},{"key":"e_1_3_2_1_38_1","unstructured":"AMD. Rocm software 6.0.2."},{"key":"e_1_3_2_1_39_1","volume-title":"Intel oneapi","year":"2022","unstructured":"Intel. Intel oneapi, 2022."},{"key":"e_1_3_2_1_40_1","volume-title":"Compute express link specification revision 3.1","author":"Compute Express Link Consortium","year":"2023","unstructured":"Compute Express Link Consortium. Compute express link specification revision 3.1, 2023."},{"key":"e_1_3_2_1_41_1","volume-title":"Samsung develops industry's first cxl dram supporting cxl 2.0","year":"2023","unstructured":"Samsung. Samsung develops industry's first cxl dram supporting cxl 2.0, 2023."},{"key":"e_1_3_2_1_42_1","unstructured":"SK Hynix. Sk hynix develops ddr5 dram cxl memory to expand the cxl memory ecosystem."},{"key":"e_1_3_2_1_43_1","unstructured":"Micron. Micron cz120 memory expansion module."},{"key":"e_1_3_2_1_44_1","volume-title":"Phy interface for the pci express, sata, usb 3.2, displayport, and usb4 architectures, revision 6.2.1","year":"2023","unstructured":"Intel. Phy interface for the pci express, sata, usb 3.2, displayport, and usb4 architectures, revision 6.2.1, 2023."},{"key":"e_1_3_2_1_45_1","unstructured":"Intel. Migration from direct-attached intel optane persistent memory to cxl-attached memory."},{"key":"e_1_3_2_1_46_1","unstructured":"Samsung. Cmm-h (cxl memory module h: Hybrid)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2973991"},{"key":"e_1_3_2_1_48_1","unstructured":"Pawan Harish Vibhav Vineet and PJ Narayanan. Large graph algorithms for massively multithreaded architectures. International Institute of Information Technology Hyderabad Tech. Rep. IIIT\/TR\/2009\/74 2009."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310322"}],"event":{"name":"HOTSTORAGE '24: 16th ACM Workshop on Hot Topics in Storage and File Systems","location":"Santa Clara CA USA","acronym":"HOTSTORAGE '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 16th ACM Workshop on Hot Topics in Storage and File Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3655038.3665953","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3655038.3665953","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:10:20Z","timestamp":1755915020000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3655038.3665953"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,8]]},"references-count":50,"alternative-id":["10.1145\/3655038.3665953","10.1145\/3655038"],"URL":"https:\/\/doi.org\/10.1145\/3655038.3665953","relation":{},"subject":[],"published":{"date-parts":[[2024,7,8]]},"assertion":[{"value":"2024-07-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}