{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:56:19Z","timestamp":1776930979473,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":93,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"U.S. National Science Foundation","award":["2104116, 2316202, 2348350"],"award-info":[{"award-number":["2104116, 2316202, 2348350"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759816","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"2216-2232","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["cMPI: Using CXL Memory Sharing for MPI One-Sided and Two-Sided Inter-Node Communications"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6251-8177","authenticated-orcid":false,"given":"Xi","family":"Wang","sequence":"first","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4287-8946","authenticated-orcid":false,"given":"Bin","family":"Ma","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5938-6878","authenticated-orcid":false,"given":"Jongryool","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1110-9971","authenticated-orcid":false,"given":"Byungil","family":"Koh","sequence":"additional","affiliation":[{"name":"SK hynix, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4017-8124","authenticated-orcid":false,"given":"Hoshik","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9336-0694","authenticated-orcid":false,"given":"Dong","family":"Li","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304061"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73370-3_6"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData62323.2024.10825804"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3533737.3535090"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2015.82"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPCW63042.2024.00022"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"D.H. Bailey E. Barszcz J.T. Barton D.S. Browning R.L. Carter L. Dagum R.A. Fatoohi P.O. Frederickson T.A. Lasinski R.S. Schreiber H.D. Simon V. Venkatakrishnan and S.K. Weeratunga. 1991. The Nas Parallel Benchmarks. The International Journal of Supercomputing Applications (1991).","DOI":"10.1177\/109434209100500306"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3520263.3534650"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2001.916641"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.5555\/320176.320181"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Henri Casanova Arnaud Giersch Arnaud Legrand Martin Quinson and Fr\u00e9d\u00e9ric Suter. 2025. Lowering Entry Barriers to Developing Custom Simulators of Distributed Applications and Platforms with SimGrid. Parallel Comput. 123 (2025) 103\u2013125.","DOI":"10.1016\/j.parco.2025.103125"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.20"},{"key":"e_1_3_3_1_14_2","first-page":"799","volume-title":"2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Chen Zhangyu","year":"2020","unstructured":"Zhangyu Chen, Yu Hua, Bo Ding, and Pengfei Zuo. 2020. Lock-free concurrent level hashing for persistent memory. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). 799\u2013812."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3127024.3127035"},{"key":"e_1_3_3_1_16_2","unstructured":"Jungmin Choi. [n. d.]. Exploring CXL Memory Disaggregation: Use Cases and System Benefits. https:\/\/memverge.com\/wp-content\/uploads\/Memory-Fabric-Forum-at-OCP-Global-Summit-2024-%E2%80%93-SK-hynix_Exploring-CXL-Memory-Disaggregation.pdf"},{"key":"e_1_3_3_1_17_2","unstructured":"CXL Micron Research Kit. 2024. Famfs Shared Memory Filesystem Framework. https:\/\/github.com\/cxl-micron-reskit\/famfs. Accessed: 2025-04-05."},{"key":"e_1_3_3_1_18_2","unstructured":"Open Fabrics\u00a0Enterprise Distribution. [n. d.]. perftest. https:\/\/github.com\/linux-rdma\/perftest."},{"key":"e_1_3_3_1_19_2","first-page":"401","volume-title":"11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14)","author":"Dragojevi\u0107 Aleksandar","year":"2014","unstructured":"Aleksandar Dragojevi\u0107, Dushyanth Narayanan, Miguel Castro, and Orion Hodson. 2014. { FaRM} : Fast remote memory. In 11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14). 401\u2013414."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Subramanya\u00a0R. Dulloor Amitabha Roy Zheguang Zhao Narayanan Sundaram Nadathur Satish Rajesh Sankaran Jeffrey\u00a0R. Jackson and Karsten Schwan. 2016. Data tiering in heterogeneous memory systems. Proceedings of the Eleventh European Conference on Computer Systems (2016). https:\/\/api.semanticscholar.org\/CorpusID:8681081","DOI":"10.1145\/2901318.2901344"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3502181.3531468"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624175"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPCW63042.2024.00026"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87475-1_21"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45825-5_5"},{"key":"e_1_3_3_1_26_2","first-page":"649","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Gu Juncheng","year":"2017","unstructured":"Juncheng Gu, Youngmoon Lee, Yiwen Zhang, Mosharaf Chowdhury, and Kang\u00a0G Shin. 2017. Efficient memory disaggregation with infiniswap. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). 649\u2013667."},{"key":"e_1_3_3_1_27_2","unstructured":"Part Guide. 2011. Intel\u00ae 64 and ia-32 architectures software developer\u2019s manual. Volume 3B: system programming guide Part 2 11 (2011) 0\u201340."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Mark Hildebrand Jawad\u00a0Ali Khan Sanjeev\u00a0N. Trika Jason Lowe-Power and Venkatesh Akella. 2020. AutoTM: Automatic Tensor Movement in Heterogeneous Memory Systems using Integer Linear Programming. Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems (2020). https:\/\/api.semanticscholar.org\/CorpusID:212641763","DOI":"10.1145\/3373376.3378465"},{"key":"e_1_3_3_1_29_2","first-page":"306","volume-title":"International workshop on languages and compilers for parallel computing","author":"Huang Chao","year":"2003","unstructured":"Chao Huang, Orion Lawlor, and Laxmikant\u00a0V Kale. 2003. Adaptive mpi. In International workshop on languages and compilers for parallel computing. Springer, 306\u2013322."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2008.4663761"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.42"},{"key":"e_1_3_3_1_32_2","unstructured":"Intel Corporation. 2019. Intel Memory Latency Checker v3.5. https:\/\/software.intel.com\/en-us\/articles\/intelr-memory-latency-checker"},{"key":"e_1_3_3_1_33_2","unstructured":"Sunita Jain Nagaradhesh Yeleswarapu Hasan\u00a0Al Maruf and Rita Gupta. 2024. Memory Sharing with CXL: Hardware and Software Design Approaches. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.03245 (2024)."},{"key":"e_1_3_3_1_34_2","first-page":"585","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Jang Junhyeok","year":"2023","unstructured":"Junhyeok Jang, Hanjin Choi, Hanyeoreum Bae, Seungjun Lee, Miryeong Kwon, and Myoungsoo Jung. 2023. CXL-ANNS: Software-Hardware Collaborative Memory Disaggregation and Computation for Billion-Scale Approximate Nearest Neighbor Search. In 2023 USENIX Annual Technical Conference (USENIX ATC 23). USENIX Association, Boston, MA, 585\u2013600. https:\/\/www.usenix.org\/conference\/atc23\/presentation\/jang"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Sudarsun Kannan Ada Gavrilovska Vishal Gupta and Karsten Schwan. 2017. HeteroOS \u2014 OS design for heterogeneous memory management in datacenter. 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA) (2017) 521\u2013534. https:\/\/api.semanticscholar.org\/CorpusID:19189083","DOI":"10.1145\/3079856.3080245"},{"key":"e_1_3_3_1_36_2","volume-title":"USENIX Annual Technical Conference","author":"Kim Jonghyeon","year":"2021","unstructured":"Jonghyeon Kim, Wonkyo Choe, and Jeongseob Ahn. 2021. Exploring the Design Space of Page Management for Multi-Tiered Memory Systems. In USENIX Annual Technical Conference. https:\/\/api.semanticscholar.org\/CorpusID:236992513"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Sandeep Kumar Aravinda Prasad Smruti\u00a0Ranjan Sarangi and Sreenivas Subramoney. 2021. Radiant: efficient page table management for tiered memory systems. Proceedings of the 2021 ACM SIGPLAN International Symposium on Memory Management (2021). https:\/\/api.semanticscholar.org\/CorpusID:235463147","DOI":"10.1145\/3459898.3463907"},{"key":"e_1_3_3_1_38_2","unstructured":"Argonne\u00a0National Lab. [n. d.]. MPICH | High-Performance Portable MPI. https:\/\/www.mpich.org\/"},{"key":"e_1_3_3_1_39_2","unstructured":"Lawrence Berkeley\u00a0National Laboratory. 2025. iPerf - The TCP UDP and SCTP network bandwidth measurement tool. https:\/\/iperf.fr\/"},{"key":"e_1_3_3_1_40_2","unstructured":"Network-Based\u00a0Computing Laboratory. [n. d.]. OSU Micro-Benchmarks.https:\/\/mvapich.cse.ohio-state.edu\/benchmarks\/"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-32041-5_16"},{"key":"e_1_3_3_1_43_2","volume-title":"International Symposium on High-Performance Parallel and Distributed Computing","author":"Li Shigang","year":"2018","unstructured":"Shigang Li, Torsten Hoefler, and Marc Snir. 2018. NUMA-Aware Shared-Memory Collective Communication for MPI. In International Symposium on High-Performance Parallel and Distributed Computing."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Zhe Li and Mingyu Wu. 2022. Transparent and lightweight object placement for managed workloads atop hybrid memories. Proceedings of the 18th ACM SIGPLAN\/SIGOPS International Conference on Virtual Execution Environments (2022). https:\/\/api.semanticscholar.org\/CorpusID:247108266","DOI":"10.1145\/3516807.3516822"},{"key":"e_1_3_3_1_45_2","unstructured":"Jinshu Liu Hamid Hadian Hanchen Xu Daniel\u00a0S. Berger and Huaicheng Li. 2024. Dissecting CXL Memory Performance at Scale: Analysis Modeling and Optimization. arxiv:https:\/\/arXiv.org\/abs\/2409.14317\u00a0[cs.OS] https:\/\/arxiv.org\/abs\/2409.14317"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460355"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441581"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00059"},{"key":"e_1_3_3_1_49_2","volume-title":"Proceedings of Supercomputing \u201920 (Posters)","author":"Luo Jiaolin","year":"2020","unstructured":"Jiaolin Luo, Luanzheng Guo, Jie Ren, Kai Wu, and Dong Li. 2020. Enabling Faster NGS Analysis on Optane-based Heterogeneous Memory. In Proceedings of Supercomputing \u201920 (Posters)."},{"key":"e_1_3_3_1_50_2","volume-title":"International Symposium on High Performance Computer Architecture (HPCA)","author":"Ma Bin","year":"2025","unstructured":"Bin Ma, Jie Ren, Shuangyan Yang, Benjamin Francis, Ehsan Ardestani, Min Si, and Dong Li. 2025. Machine Learning-Guided Memory Optimization for DLRM Inference on Tiered Memory. In International Symposium on High Performance Computer Architecture (HPCA)."},{"key":"e_1_3_3_1_51_2","volume-title":"USENIX Annual Technical Conference","author":"Ma Teng","year":"2024","unstructured":"Teng Ma, Zheng Liu, Chengkun Wei, Jialiang Huang, Youwei Zhuo, Haoyu Li, Ning Zhang, Yijin Guan, Dimin Niu, Mingxing Zhang, et\u00a0al. 2024. { HydraRPC} :{ RPC} in the { CXL} Era. In USENIX Annual Technical Conference."},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"crossref","unstructured":"Adnan Maruf Ashikee Ghosh Janki Bhimani Daniela Campello Andy Rudoff and Raju Rangaswami. 2022. MULTI-CLOCK: Dynamic Tiering for Hybrid Memory Systems. 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA) (2022) 925\u2013937. https:\/\/api.semanticscholar.org\/CorpusID:248865268","DOI":"10.1109\/HPCA53966.2022.00072"},{"key":"e_1_3_3_1_53_2","unstructured":"NVIDIA. [n. d.]. NVIDIA ConnectX-6 InfiniBand\/Ethernet Adapter Cards User Manual. https:\/\/docs.nvidia.com\/networking\/display\/connectx6vpi\/introduction"},{"key":"e_1_3_3_1_54_2","volume-title":"Proceedings of the International Symposium on Memory Systems","author":"Peng Ivy","year":"2022","unstructured":"Ivy Peng, Ian Karlin, Maya Gokhale, Kathleen Shoga, Matthew Legendre, and Todd Gamblin. 2022. A Holistic View of Memory Utilization on HPC Systems: Current and Future Trends. In Proceedings of the International Symposium on Memory Systems."},{"key":"e_1_3_3_1_55_2","unstructured":"Amanda Raybuck Tim Stamler Wei Zhang Mattan Erez and Simon Peter. 2021. HeMem: Scalable Tiered Memory Management for Big Data Applications and Real NVM. Proceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles (2021). https:\/\/api.semanticscholar.org\/CorpusID:239029009"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460356"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Jie Ren Jiaolin Luo Kai Wu Minjia Zhang and Hyeran Jeon. 2021. Sentinel: Efficient Tensor Migration and Allocation on Heterogeneous Memory Systems for Deep Learning. 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA) (2021) 598\u2013611. https:\/\/api.semanticscholar.org\/CorpusID:231620477","DOI":"10.1109\/HPCA51647.2021.00057"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650075"},{"key":"e_1_3_3_1_59_2","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Ren Jie","year":"2020","unstructured":"Jie Ren, Minjia Zhang, and Dong Li. 2020. HM-ANN: Efficient Billion-Point Nearest Neighbor Search on Heterogeneous Memory. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"crossref","unstructured":"Niklas Schelten Fritjof Steinert Justin Knapheide Anton Schulte and Benno Stabernack. 2022. A High-Throughput Resource-Efficient Implementation of the RoCEv2 Remote DMA Protocol and its Application. ACM Transactions on Reconfigurable Technologies and Systems Article 5 (2022).","DOI":"10.1145\/3543176"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI55740.2022.00017"},{"key":"e_1_3_3_1_62_2","unstructured":"Debendra\u00a0Das Sharma Robert Blankenship and Daniel\u00a0S. Berger. 2023. An Introduction to the Compute Express Link (CXL) Interconnect. arxiv:https:\/\/arXiv.org\/abs\/2306.11227\u00a0[cs.AR]"},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542320"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","unstructured":"Nawrin Sultana Martin R\u00fcfenacht Anthony Skjellum Purushotham Bangalore Ignacio Laguna and Kathryn Mohror. [n. d.]. Understanding the use of message passing interface in exascale proxy applications. Concurrency and Computation: Practice and Experience 33 14 ([n. d.]). 10.1002\/cpe.5901","DOI":"10.1002\/cpe.5901"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614256"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650061"},{"key":"e_1_3_3_1_67_2","unstructured":"Mellanox Technologies. [n. d.]. HP and Mellanox Benchmarking Report for Ultra Low Latency 10 and 40Gb\/s Ethernet Interconnect.https:\/\/network.nvidia.com\/related-docs\/whitepapers\/HP_Mellanox_FSI%20Benchmarking%20Report%20for%2010%20%26%2040GbE.pdf"},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626203.3670533"},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI.2019.00017"},{"key":"e_1_3_3_1_70_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2015.129"},{"key":"e_1_3_3_1_71_2","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC56545.2022.00007"},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607108"},{"key":"e_1_3_3_1_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607108"},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPCW63042.2024.00039"},{"key":"e_1_3_3_1_75_2","doi-asserted-by":"crossref","unstructured":"Chenxi Wang Huimin Cui Ting Cao John\u00a0N. Zigman Haris Volos Onur Mutlu Fang Lv Xiaobing Feng and Guoqing\u00a0Harry Xu. 2019. Panthera: holistic memory management for big data processing over hybrid memories. Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (2019). https:\/\/api.semanticscholar.org\/CorpusID:150372592","DOI":"10.1145\/3314221.3314650"},{"key":"e_1_3_3_1_76_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS64566.2025.00097"},{"key":"e_1_3_3_1_77_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00049"},{"key":"e_1_3_3_1_78_2","doi-asserted-by":"crossref","unstructured":"Johannes Weiner Niket Agarwal Dan Schatzberg Leon Yang Hao Wang Blaise Sanouillet Bikash Sharma Tejun Heo M. Jain Chunqiang Tang and Dimitrios Skarlatos. 2022. TMO: transparent memory offloading in datacenters. Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (2022). https:\/\/api.semanticscholar.org\/CorpusID:247026540","DOI":"10.1145\/3503222.3507731"},{"key":"e_1_3_3_1_79_2","doi-asserted-by":"crossref","unstructured":"Sam White and Laxmikant\u00a0V Kale. 2020. Optimizing Point-to-Point Communication between Adaptive MPI Endpoints in Shared Memory. Concurrency and Computation: Practice and Experience 32 3 (2020).","DOI":"10.1002\/cpe.4467"},{"key":"e_1_3_3_1_80_2","doi-asserted-by":"publisher","DOI":"10.1145\/3695794.3695809"},{"key":"e_1_3_3_1_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126923"},{"key":"e_1_3_3_1_82_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00034"},{"key":"e_1_3_3_1_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/2907294.2907321"},{"key":"e_1_3_3_1_84_2","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3519556"},{"key":"e_1_3_3_1_85_2","unstructured":"Rui Xie Asad\u00a0Ul Haq Linsen Ma Krystal Sun Sanchari Sen Swagath Venkataramani Liu Liu and Tong Zhang. 2024. SmartQuant: CXL-based AI Model Store in Support of Runtime Configurable Weight Quantization. arxiv:https:\/\/arXiv.org\/abs\/2407.15866\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2407.15866"},{"key":"e_1_3_3_1_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577497"},{"key":"e_1_3_3_1_87_2","doi-asserted-by":"crossref","unstructured":"Zi-Wei Xiong De-Jun Jiang Jin Xiong and Ren Ren. 2023. Dalea: A persistent multi-level extendible hashing with improved tail performance. Journal of Computer Science and Technology 38 5 (2023) 1051\u20131073.","DOI":"10.1007\/s11390-023-2957-8"},{"key":"e_1_3_3_1_88_2","volume-title":"30th USENIX Annual Technical Conference (ATC)","author":"Xu Dong","year":"2024","unstructured":"Dong Xu, Junhee Ryu, Jinho Baek, Kwangsik Shin, Pengfei Su, and Dong Li. 2024. FlexMem: Adaptive Page Profiling and Migration for Tiered Memory. In 30th USENIX Annual Technical Conference (ATC)."},{"key":"e_1_3_3_1_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575725"},{"key":"e_1_3_3_1_90_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00083"},{"key":"e_1_3_3_1_91_2","unstructured":"Yujie Yang Lingfeng Xiang Peiran Du Zhen Lin Weishu Deng Ren Wang Andrey Kudryavtsev Louis Ko Hui Lu and Jia Rao. 2025. Architectural and System Implications of CXL-enabled Tiered Memory. arxiv:https:\/\/arXiv.org\/abs\/2503.17864\u00a0[cs.AR] https:\/\/arxiv.org\/abs\/2503.17864"},{"key":"e_1_3_3_1_92_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613135"},{"key":"e_1_3_3_1_93_2","first-page":"461","volume-title":"13th { USENIX} Symposium on Operating Systems Design and Implementation ({ OSDI} 18)","author":"Zuo Pengfei","year":"2018","unstructured":"Pengfei Zuo, Yu Hua, and Jie Wu. 2018. Write-optimized and high-performance hashing index scheme for persistent memory. In 13th { USENIX} Symposium on Operating Systems Design and Implementation ({ OSDI} 18). 461\u2013476."},{"key":"e_1_3_3_1_94_2","doi-asserted-by":"crossref","unstructured":"Pengfei Zuo Yu Hua and Jie Wu. 2019. Level hashing: A high-performance and flexible-resizing persistent hashing index structure. ACM Transactions on Storage (TOS) 15 2 (2019) 1\u201330.","DOI":"10.1145\/3322096"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3712285.3759816","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759816","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759816","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:23:45Z","timestamp":1773253425000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759816"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":93,"alternative-id":["10.1145\/3712285.3759816","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759816","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}