{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T11:45:42Z","timestamp":1767959142301,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","funder":[{"name":"Plasma-PEPSC","award":["101093261"],"award-info":[{"award-number":["101093261"]}]},{"name":"SEANERGYS","award":["101177590"],"award-info":[{"award-number":["101177590"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,1,26]]},"DOI":"10.1145\/3773656.3773661","type":"proceedings-article","created":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:22:11Z","timestamp":1767954131000},"page":"258-270","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Modeling the Potential of Message-Free Communication via CXL.mem"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4120-9472","authenticated-orcid":false,"given":"Stepan","family":"Vanecek","sequence":"first","affiliation":[{"name":"Technical University of Munich, Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8529-6709","authenticated-orcid":false,"given":"Matthew","family":"Turner","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6589-4209","authenticated-orcid":false,"given":"Manisha","family":"Gajbe","sequence":"additional","affiliation":[{"name":"Self, Folsom, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8393-4436","authenticated-orcid":false,"given":"Matthew","family":"Wolf","sequence":"additional","affiliation":[{"name":"Self, Portland, OR, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9013-435X","authenticated-orcid":false,"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,25]]},"reference":[{"key":"e_1_3_3_2_2_2","volume-title":"AMD uProf User Guide","author":"Inc Advanced Micro Devices","year":"2022","unstructured":"Advanced Micro Devices Inc. 2022. AMD uProf User Guide. AMD."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData62323.2024.10825804"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Soramichi Akiyama and Takahiro Hirofuchi. 2017. Quantitative evaluation of intel pebs overhead for online system-noise analysis(ROSS \u201917). 1\u20138.","DOI":"10.1145\/3095770.3095773"},{"key":"e_1_3_3_2_5_2","volume-title":"Intl. Symp. on Microarchitecture, Davis, CA","author":"Black Bryan","year":"2013","unstructured":"Bryan Black. 2013. Die stacking is happening. In Intl. Symp. on Microarchitecture, Davis, CA."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2010.67"},{"key":"e_1_3_3_2_7_2","unstructured":"Barcelona\u00a0Supercomputing Center. Accessed: 20.3.2024. Extrae: Performance Analysis Tool. https:\/\/tools.bsc.es\/extrae."},{"key":"e_1_3_3_2_8_2","unstructured":"CXL Consortium. 2024. CXL Specification Revision 3.2. https:\/\/computeexpresslink.org\/cxl-specification. Accessed: 2025-04-18."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901344"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Eichenberger Alexandre E et al.2013. OMPT: An OpenMP Tools Application Programming Interface for Performance Analysis Vol.\u00a08122. doi:10.1007\/978-3-642-40698-0_13","DOI":"10.1007\/978-3-642-40698-0_13"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Cl\u00e9ment Foyer Brice Goglin and Andr\u00e8s\u00a0Rubio Proa\u00f1o. 2023. A survey of software techniques to emulate heterogeneous memory systems in high-performance computing. Parallel Comput. 116 (2023) 103023.","DOI":"10.1016\/j.parco.2023.103023"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624175"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Markus Geimer Felix Wolf Brian\u00a0JN Wylie Erika \u00c1brah\u00e1m Daniel Becker and Bernd Mohr. 2010. The Scalasca performance toolset architecture. Concurrency and computation: Practice and experience 22 6 (2010) 702\u2013719.","DOI":"10.1002\/cpe.1556"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.19"},{"key":"e_1_3_3_2_15_2","unstructured":"Alfredo Gim\u00e9nez Benafsh Husain David B\u00f6hme Todd Gamblin and Martin Schulz. 2015. Mitos: A Simple Interface for Complex Hardware Sampling and Attribution."},{"key":"e_1_3_3_2_16_2","unstructured":"glibc [n. d.]. The GNU C Library (glibc). https:\/\/sourceware.org\/glibc\/."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480058"},{"key":"e_1_3_3_2_18_2","volume-title":"Presented on SC19 Conference, Denver, CO","author":"Gruber Thomas","year":"2019","unstructured":"Thomas Gruber, Jan Eitzinger, Georg Hager, and Gerhard Wellein. 2019. likwid 5: Lightweight performance tools. In Presented on SC19 Conference, Denver, CO."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"F.T. Hady A. Foong B. Veal and D. Williams. 2017. Platform storage performance with 3D XPoint technology. Proc. IEEE 105 9 (2017) 1822\u20131833.","DOI":"10.1109\/JPROC.2017.2731776"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.2172\/1089988"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Roger\u00a0W Hockney. 1994. The communication challenge for MPP: Intel Paragon and Meiko CS-2. Parallel computing 20 3 (1994) 389\u2013398.","DOI":"10.1016\/S0167-8191(06)80021-9"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/1851476.1851564"},{"key":"e_1_3_3_2_23_2","unstructured":"Intel Corporation. 2024. Intel\u00ae VTune\u2122 Profiler. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/vtune-profiler.html Accessed: 2024-12-05."},{"key":"e_1_3_3_2_24_2","volume-title":"IEEE Cluster 2022-2022 IEEE International Conference on Cluster Computing","author":"Klinkenberg Jannis","year":"2022","unstructured":"Jannis Klinkenberg, Anara Kozhokanova, Christian Terboven, Cl\u00e9ment Foyer, Brice Goglin, and Emmanuel Jeannot. 2022. H2M: Towards Heuristics for Heterogeneous Memory. In IEEE Cluster 2022-2022 IEEE International Conference on Cluster Computing."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68564-7_9"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31476-6_7"},{"key":"e_1_3_3_2_27_2","unstructured":"Jinshu Liu Hamid Hadian Hanchen Xu Daniel\u00a0S Berger and Huaicheng Li. 2024. Dissecting cxl memory performance at scale: Analysis modeling and optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.14317 (2024)."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Xu Liu and John Mellor-Crummey. 2014. A tool to analyze the performance of multithreaded programs on NUMA architectures. ACM Sigplan Notices 49 8 (2014) 259\u2013272.","DOI":"10.1145\/2692916.2555271"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Chi-Keung Luk Robert Cohn Robert Muth Harish Patil Artur Klauser Geoff Lowney Steven Wallace Vijay\u00a0Janapa Reddi and Kim Hazelwood. 2005. Pin: building customized program analysis tools with dynamic instrumentation. Acm sigplan notices 40 6 (2005) 190\u2013200.","DOI":"10.1145\/1064978.1065034"},{"key":"e_1_3_3_2_30_2","volume-title":"MPI: A Message-Passing Interface Standard Version 4.1","author":"Forum Message Passing Interface","year":"2023","unstructured":"Message Passing Interface Forum. 2023. MPI: A Message-Passing Interface Standard Version 4.1. https:\/\/www.mpi-forum.org\/docs\/mpi-4.1\/mpi41-report.pdf"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056027"},{"key":"e_1_3_3_2_32_2","volume-title":"Proceedings of the department of defense HPCMP users group conference","volume":"710","author":"Mucci Philip\u00a0J","year":"1999","unstructured":"Philip\u00a0J Mucci, Shirley Browne, Christine Deane, and George Ho. 1999. PAPI: A portable interface to hardware performance counters. In Proceedings of the department of defense HPCMP users group conference , Vol.\u00a0710."},{"key":"e_1_3_3_2_33_2","unstructured":"Network-Based Computing Laboratory. 2023. OSU Micro-Benchmarks. http:\/\/mvapich.cse.ohio-state.edu\/benchmarks\/. Accessed: 2025-09-09."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3286475.3286477"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Ivy\u00a0Bo Peng Roberto Gioiosa Gokcen Kestor Pietro Cicotti Erwin Laure and Stefano Markidis. 2017. Rthms: A tool for data placement on hybrid memory system. ACM SIGPLAN Notices 52 9 (2017) 82\u201391.","DOI":"10.1145\/3156685.3092273"},{"key":"e_1_3_3_2_36_2","first-page":"17","volume-title":"Proceedings of WoTUG-18: transputer and occam developments","volume":"44","author":"Pillet Vincent","year":"1995","unstructured":"Vincent Pillet, Jes\u00fas Labarta, Toni Cortes, and Sergi Girona. 1995. Paraver: A tool to visualize and analyze parallel code. In Proceedings of WoTUG-18: transputer and occam developments , Vol.\u00a044. 17\u201331."},{"key":"e_1_3_3_2_37_2","volume-title":"QEMU \u2013 Compute Express Link (CXL)","author":"Developers QEMU Project","year":"2025","unstructured":"QEMU Project Developers. 2025. QEMU \u2013 Compute Express Link (CXL). https:\/\/www.qemu.org\/docs\/master\/system\/devices\/cxl.html Accessed: Sept. 17, 2025."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Milan Radulovic Rommel S\u00e1nchez\u00a0Verdejo Paul Carpenter Petar Radojkovi\u0107 Bruce Jacob and Eduard Ayguad\u00e9. 2019. PROFET: Modeling system performance and energy without simulating the CPU. Proceedings of the ACM on Measurement and Analysis of Computing Systems 3 2 (2019) 1\u201333.","DOI":"10.1145\/3341617.3326149"},{"key":"e_1_3_3_2_39_2","unstructured":"Giridhar Ravipati Andrew\u00a0R Bernat Nate Rosenblum Barton\u00a0P Miller and Jeffrey\u00a0K Hollingsworth. 2007. Toward the deconstruction of Dyninst. Univ. of Wisconsin technical report 32 (2007)."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Debendra\u00a0Das Sharma. 2022. Compute express link (cxl): Enabling heterogeneous data-centric computing with heterogeneous memory hierarchy. IEEE Micro 43 2 (2022) 99\u2013109.","DOI":"10.1109\/MM.2022.3228561"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Sameer\u00a0S Shende and Allen\u00a0D Malony. 2006. The TAU parallel performance system. The International Journal of High Performance Computing Applications 20 2 (2006) 287\u2013311.","DOI":"10.1177\/1094342006064482"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614256"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626203.3670533"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31476-6_3"},{"key":"e_1_3_3_2_45_2","volume-title":"SC \u201923: The International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Vanecek Stepan","year":"2023","unstructured":"Stepan Vanecek and Martin Schulz. 2023. sys-sage: A Fresh View on Dynamic Topologies & Attributes of HPC Systems. In SC \u201923: The International Conference for High Performance Computing, Networking, Storage and Analysis. ACM, Denver, CO, USA. https:\/\/sc23.supercomputing.org\/proceedings\/tech_poster\/poster_files\/rpost114s3-file3.pdf Extended abstract for a research poster."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656627"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC56545.2022.00007"},{"key":"e_1_3_3_2_48_2","unstructured":"Xi Wang Jie Liu Jianbo Wu Shuangyan Yang Jie Ren Bhanu Shankar and Dong Li. 2024. Exploring and evaluating real-world cxl: Use cases and system adoption. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.14209 (2024)."},{"key":"e_1_3_3_2_49_2","unstructured":"Vincent\u00a0M Weaver et\u00a0al. 2016. Advanced hardware profiling and sampling (PEBS IBS etc.): creating a new PAPI sampling interface. Technical Report UMAINE-VMWTR-PEBS-IBS-SAMPLING-2016-08. University of Maine Tech. Rep. (2016)."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Wm\u00a0A Wulf and Sally\u00a0A McKee. 1995. Hitting the memory wall: Implications of the obvious. ACM SIGARCH computer architecture news 23 1 (1995).","DOI":"10.1145\/216585.216588"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3409963.3410490"}],"event":{"name":"SCA\/HPCAsia 2026: Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region","location":"Osaka Japan","acronym":"SCA\/HPCAsia 2026"},"container-title":["Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region"],"original-title":[],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:23:05Z","timestamp":1767954185000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773656.3773661"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,25]]},"references-count":50,"alternative-id":["10.1145\/3773656.3773661","10.1145\/3773656"],"URL":"https:\/\/doi.org\/10.1145\/3773656.3773661","relation":{},"subject":[],"published":{"date-parts":[[2026,1,25]]},"assertion":[{"value":"2026-01-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}