{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T00:04:23Z","timestamp":1773619463278,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":108,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2212192"],"award-info":[{"award-number":["CNS-2212192"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3718958.3750479","type":"proceedings-article","created":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T16:54:11Z","timestamp":1756313651000},"page":"758-779","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Understanding and Profiling CXL.mem Using PathFinder"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6202-3728","authenticated-orcid":false,"given":"Xiao","family":"Li","sequence":"first","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"},{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9719-0397","authenticated-orcid":false,"given":"Zerui","family":"Guo","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2544-3989","authenticated-orcid":false,"given":"Yuebin","family":"Bai","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8884-5010","authenticated-orcid":false,"given":"Mahesh","family":"Ketkar","sequence":"additional","affiliation":[{"name":"Intel, Folsom, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8086-6343","authenticated-orcid":false,"given":"Hugh","family":"Wilkinson","sequence":"additional","affiliation":[{"name":"Intel, Hudson, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6509-9449","authenticated-orcid":false,"given":"Ming","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,8,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"d.]. Intel VTune Profiler. https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/tools\/oneapi\/components\/vtune-profiler.html, year =","year":"2025","unstructured":"[n. d.]. Intel VTune Profiler. https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/tools\/oneapi\/components\/vtune-profiler.html, year = 2025,. ([n. d.])."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. NVIDIA Nsight. https:\/\/developer.nvidia.com\/tools-overview. (2024)."},{"key":"e_1_3_2_1_3_1","unstructured":"2025. AMD uProf. https:\/\/www.amd.com\/en\/developer\/uprof.html. (2025)."},{"key":"e_1_3_2_1_4_1","unstructured":"2025. Hardware LLC prefetch feature on 4th Gen Intel Xeon Scalable Processor. https:\/\/www.intel.com\/content\/www\/us\/en\/content-details\/780991\/hardware-llc-prefetch-feature-on-4th-gen-intel-xeon-scalable-processor-codename-sapphire-rapids.html. (2025)."},{"key":"e_1_3_2_1_5_1","unstructured":"2025. InfluxData: InfluxDB Time Series Data Platform. https:\/\/www.influxdata.com. (2025)."},{"key":"e_1_3_2_1_6_1","unstructured":"2025. Intel Compute Express Link (CXL) FPGA IP. https:\/\/www.intel.com\/content\/www\/us\/en\/products\/details\/fpga\/intellectual-property\/interface-protocols\/cxl-ip.html. (2025)."},{"key":"e_1_3_2_1_7_1","unstructured":"2025. Intel Memory Latency Checker. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/tool\/intelr-memory-latency-checker.html. (2025)."},{"key":"e_1_3_2_1_8_1","unstructured":"2025. Intel VTune Profiler. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/vtune-profiler.html. (2025)."},{"key":"e_1_3_2_1_9_1","unstructured":"2025. Intel Xeon Processor Scalable Family Technical Overview. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/xeon-processor-scalable-family-technical-overview.html. (2025)."},{"key":"e_1_3_2_1_10_1","unstructured":"2025. IntelliProp's Omega Fabric. https:\/\/www.intelliprop.com\/products-page. (2025)."},{"key":"e_1_3_2_1_11_1","unstructured":"2025. INTERNET CONTROL MESSAGE PROTOCOL. https:\/\/datatracker.ietf.org\/doc\/html\/rfc792. (2025)."},{"key":"e_1_3_2_1_12_1","unstructured":"2025. \"Memory directories\" in Intel processors. https:\/\/sites.utexas.edu\/jdm4372\/2023\/08\/28\/memory-directories-in-intel-processors\/. (2025)."},{"key":"e_1_3_2_1_13_1","unstructured":"2025. Multichase: A Pointer Chaser Benchmark. https:\/\/www.amd.com\/en\/developer\/uprof.html. (2025)."},{"key":"e_1_3_2_1_14_1","unstructured":"2025. Redis. https:\/\/redis.io\/. (2025)."},{"key":"e_1_3_2_1_15_1","unstructured":"2025. Samsung CXL Memory Module - Box (CMM-B). https:\/\/semiconductor.samsung.com\/news-events\/tech-blog\/cxl-memory-module-box-cmm-b\/. (2025)."},{"key":"e_1_3_2_1_16_1","unstructured":"2025. SMART CXL Memory Modules. https:\/\/www.smartm.com\/product\/list\/cxl-memory?utm_source=CXL&utm_medium=Website&utm_term=CXL-Website-TR&utm_content=CXL-Website-Link&utm_campaign=CXL-Website. (2025)."},{"key":"e_1_3_2_1_17_1","volume-title":"SPEC CPU 2017","year":"2017","unstructured":"2025. SPEC CPU 2017. https:\/\/www.spec.org\/cpu2017\/. (2025)."},{"key":"e_1_3_2_1_18_1","unstructured":"2025. The CXL Specification. https:\/\/www.computeexpresslink.org\/download-the-specification. (2025)."},{"key":"e_1_3_2_1_19_1","unstructured":"2025. The Falcon C5022. https:\/\/www.h3platform.com\/product-detail\/overview\/35. (2025)."},{"key":"e_1_3_2_1_20_1","unstructured":"2025. The GigaIO FabreX Platform. https:\/\/gigaio.com\/products\/fabrex-system-overview\/. (2025)."},{"key":"e_1_3_2_1_21_1","unstructured":"2025. The Leo CXL\u2122 Memory Connectivity Platform. https:\/\/www.asteralabs.com\/products\/cxl-memory-platform\/leo-cxl-memory-connectivity-platform\/. (2025)."},{"key":"e_1_3_2_1_22_1","unstructured":"2025. The PCI Express (PCIe) Specification. https:\/\/pcisig.com\/specifications\/pciexpress\/. (2025)."},{"key":"e_1_3_2_1_23_1","unstructured":"2025. Traceroute Wikipedia. https:\/\/en.wikipedia.org\/wiki\/Traceroute. (2025)."},{"key":"e_1_3_2_1_24_1","unstructured":"2025. UnifabriX MAX. https:\/\/www.unifabrix.com\/technology. (2025)."},{"key":"e_1_3_2_1_25_1","unstructured":"2025. XConn Titan Evaluation Kit. https:\/\/www.xconn-tech.com\/products. (2025)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3563766.3564110"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3563766.3564110"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604878"},{"key":"e_1_3_2_1_29_1","volume-title":"Ali Seyed Shirkhorshidi, and Teh Ying Wah","author":"Aghabozorgi Saeed","year":"2015","unstructured":"Saeed Aghabozorgi, Ali Seyed Shirkhorshidi, and Teh Ying Wah. 2015. Time-Series Clustering-a Decade Review. Information systems 53 (2015), 16\u201338."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/1402958.1402967"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716007"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015467.1015499"},{"key":"e_1_3_2_1_33_1","volume-title":"Patterson","author":"Beamer Scott","year":"2015","unstructured":"Scott Beamer, Krste Asanovi\u0107, and David A. Patterson. 2015. The GAP Benchmark Suite. ArXiv abs\/1508.03619 (2015). https:\/\/api.semanticscholar.org\/CorpusID:11503794"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3241586"},{"key":"e_1_3_2_1_35_1","volume-title":"2008 International Conference on Parallel Architectures and Compilation Techniques (PACT). 72\u201381","author":"Bienia Christian","year":"2008","unstructured":"Christian Bienia, Sanjeev Kumar, Jaswinder Pal Singh, and Kai Li. 2008. The PAR-SEC benchmark suite: Characterization and architectural implications. In 2008 International Conference on Parallel Architectures and Compilation Techniques (PACT). 72\u201381."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359989.3365408"},{"key":"e_1_3_2_1_37_1","volume-title":"2024 IEEE 32nd International Conference on Network Protocols (ICNP'24)","author":"Chen Xuzheng","year":"2024","unstructured":"Xuzheng Chen, Jie Zhang, Ting Fu, Yifan Shen, Shu Ma, Kun Qian, Lingjun Zhu, Chao Shi, Yin Zhang, Ming Liu, et al. 2024. Demystifying datapath accelerator enhanced off-path smartnic. In 2024 IEEE 32nd International Conference on Network Protocols (ICNP'24). 1\u201312."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1953.tb01433.x"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807128.1807152"},{"key":"e_1_3_2_1_40_1","volume-title":"Slides from Linux Kongress","author":"De Melo Arnaldo Carvalho","unstructured":"Arnaldo Carvalho De Melo. 2010. The new linux'perf'tools. In Slides from Linux Kongress, Vol. 18. 1\u201342."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/781131.781159"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582031"},{"key":"e_1_3_2_1_43_1","volume-title":"Simulation and Application Profiling. In 2024 57th IEEE\/ACM International Symposium on Microarchitecture (MICRO'24)","author":"Esmaili-Dokht Pouya","year":"2024","unstructured":"Pouya Esmaili-Dokht, Francesco Sgherzi, Val\u00e9ria Soldera Girelli, Isaac Boixaderas, Mariana Carmin, Alireza Monemi, Adri\u00e0 Armejach, Estanislao Mercadal, Germ\u00e1n Llort, Petar Radojkovi\u0107, Miquel Moreto, Judit Gim\u00e9nez, Xavier Martorell, Eduard Ayguad\u00e9, Jesus Labarta, Emanuele Confalonieri, Rishabh Dubey, and Jason Adlard. 2024. A Mess of Memory System Benchmarking, Simulation and Application Profiling. In 2024 57th IEEE\/ACM International Symposium on Microarchitecture (MICRO'24). 136\u2013152."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1145\/2063176.2063196","article-title":"Bufferbloat: dark buffers in the internet","volume":"55","author":"Nichols Jim","year":"2012","unstructured":"Gettys, Jim and Nichols, Kathleen. 2012. Bufferbloat: dark buffers in the internet. Commun. ACM 55, 1 (2012), 57\u201365.","journal-title":"Commun. ACM"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3237491"},{"key":"e_1_3_2_1_46_1","volume-title":"High-Performance Memory Disaggregation with DirectCXL. In 2022 USENIX Annual Technical Conference (ATC'22)","author":"Gouk Donghyun","year":"2022","unstructured":"Donghyun Gouk, Sangwon Lee, Miryeong Kwon, and Myoungsoo Jung. 2022. Direct Access, High-Performance Memory Disaggregation with DirectCXL. In 2022 USENIX Annual Technical Conference (ATC'22)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/872726.806987"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614291"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507762"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604880"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669165"},{"key":"e_1_3_2_1_52_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Haecki Roni","year":"2022","unstructured":"Roni Haecki, Radhika Niranjan Mysore, Lalith Suresh, Gerd Zellweger, Bo Gan, Timothy Merrifield, Sujata Banerjee, and Timothy Roscoe. 2022. How to diagnose nanosecond network latencies in rich end-host stacks. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). Renton, WA, 861\u2013877."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"James D Hamilton. 2020. Time series analysis.","DOI":"10.2307\/j.ctv14jx6sm"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'23)","volume":"2","author":"He Yongchao","year":"2023","unstructured":"Yongchao He, Wenfei Wu, Yanfang Le, Ming Liu, and ChonLam Lao. 2023. A Generic Service to Provide In-Network Aggregation for Key-Value Streams. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'23), Volume 2. 33\u201347."},{"key":"e_1_3_2_1_55_1","volume-title":"Understanding Routable PCIe Performance for Composable Infrastructures. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI'24)","author":"Hou Wentao","year":"2024","unstructured":"Wentao Hou, Jie Zhang, Zeke Wang, and Ming Liu. 2024. Understanding Routable PCIe Performance for Composable Infrastructures. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI'24). 297\u2013312."},{"key":"e_1_3_2_1_56_1","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Hwang Jaehyun","year":"2020","unstructured":"Jaehyun Hwang, Qizhe Cai, Ao Tang, and Rachit Agarwal. 2020. TCP \u2248 RDMA: CPU-efficient Remote Storage Access with i10. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20). 127\u2013140."},{"key":"e_1_3_2_1_57_1","volume-title":"22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25)","author":"Jiang Sheng","year":"2025","unstructured":"Sheng Jiang and Ming Liu. 2025. Building an Elastic Block Storage over EBOFs Using Shadow Views. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25). 1137\u20131153."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750392"},{"key":"e_1_3_2_1_59_1","volume-title":"22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25)","author":"Kang Yuyuan","year":"2025","unstructured":"Yuyuan Kang and Ming Liu. 2025. Understanding and Profiling NVMe-over-TCP Using ntprof. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25). 1117\u20131136."},{"key":"e_1_3_2_1_60_1","volume-title":"7th USENIX Symposium on Networked Systems Design and Implementation (NSDI'10)","author":"Katz-Bassett Ethan","year":"2010","unstructured":"Ethan Katz-Bassett, Harsha V. Madhyastha, Vijay Kumar Adhikari, Colin Scott, Justine Sherry, Peter van Wesep, Thomas Anderson, and Arvind Krishnamurthy. 2010. Reverse traceroute. In 7th USENIX Symposium on Networked Systems Design and Implementation (NSDI'10)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.49"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901337"},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'17)","author":"Klimovic Ana","year":"2017","unstructured":"Ana Klimovic, Heiner Litz, and Christos Kozyrakis. 2017. ReFlex: Remote Flash \u2248 Local Flash. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'17). 345\u2013359."},{"key":"e_1_3_2_1_64_1","volume-title":"20th USENIX Conference on File and Storage Technologies (FAST 22)","author":"Lee Youngmoon","year":"2022","unstructured":"Youngmoon Lee, Hasan Al Maruf, Mosharaf Chowdhury, Asaf Cidon, and Kang G Shin. 2022. Hydra: Resilient and highly available remote memory. In 20th USENIX Conference on File and Storage Technologies (FAST 22). 181\u2013198."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626111.3628195"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_2_1_67_1","volume-title":"21st USENIX Conference on File and Storage Technologies (FAST 23)","author":"Li Pengfei","year":"2023","unstructured":"Pengfei Li, Yu Hua, Pengfei Zuo, Zhangyu Chen, and Jiajie Sheng. 2023. {ROLEX}: A Scalable {RDMA-oriented} Learned {Key-Value} Store for Disaggregated Memory Systems. In 21st USENIX Conference on File and Storage Technologies (FAST 23). 99\u2013114."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3715987"},{"key":"e_1_3_2_1_69_1","volume-title":"Hostping: Diagnosing Intra-host Network Bottlenecks in RDMA Servers. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI'23)","author":"Liu Kefei","year":"2023","unstructured":"Kefei Liu, Zhuo Jiang, Jiao Zhang, Haoran Wei, Xiaolong Zhong, Lizhuang Tan, Tian Pan, and Tao Huang. 2023. Hostping: Diagnosing Intra-host Network Bottlenecks in RDMA Servers. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI'23). 15\u201329."},{"key":"e_1_3_2_1_70_1","volume-title":"Hostping: Diagnosing Intra-host Network Bottlenecks in RDMA Servers. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Liu Kefei","year":"2023","unstructured":"Kefei Liu, Zhuo Jiang, Jiao Zhang, Haoran Wei, Xiaolong Zhong, Lizhuang Tan, Tian Pan, and Tao Huang. 2023. Hostping: Diagnosing Intra-host Network Bottlenecks in RDMA Servers. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). Boston, MA, 15\u201329."},{"key":"e_1_3_2_1_71_1","volume-title":"Building Distributed Systems Using Programmable Networks","author":"Liu Ming","unstructured":"Ming Liu. 2020. Building Distributed Systems Using Programmable Networks. University of Washington."},{"key":"e_1_3_2_1_72_1","volume-title":"Fabric-Centric Computing. In Proceedings of the 19th Workshop on Hot Topics in Operating Systems. 118\u2013126","author":"Liu Ming","year":"2023","unstructured":"Ming Liu. 2023. Fabric-Centric Computing. In Proceedings of the 19th Workshop on Hot Topics in Operating Systems. 118\u2013126."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341302.3342079"},{"key":"e_1_3_2_1_74_1","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI'20)","author":"Liu Ming","year":"2020","unstructured":"Ming Liu, Arvind Krishnamurthy, Harsha V. Madhyastha, Rishi Bhardwaj, Karan Gupta, Chinmay Kamat, Huapeng Yuan, Aditya Jaltade, Roger Liao, Pavan Konka, and Anoop Jawahar. 2020. Fine-Grained Replicated State Machines for a Cluster Storage System. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI'20). 305\u2013323."},{"key":"e_1_3_2_1_75_1","volume-title":"Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'17)","author":"Liu Ming","year":"2017","unstructured":"Ming Liu, Liang Luo, Jacob Nelson, Luis Ceze, Arvind Krishnamurthy, and Kishore Atreya. 2017. IncBricks: Toward In-Network Computation with an In-Network Cache. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS'17). 795\u2013809."},{"key":"e_1_3_2_1_76_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC'19)","author":"Liu Ming","year":"2019","unstructured":"Ming Liu, Simon Peter, Arvind Krishnamurthy, and Phitchaya Mangpo Phothilimthana. 2019. E3: Energy-Efficient Microservices on SmartNIC-Accelerated Servers. In 2019 USENIX Annual Technical Conference (USENIX ATC'19). 363\u2013378."},{"key":"e_1_3_2_1_77_1","volume-title":"Workshop on Approximate Computing Across the Stack.","author":"Luo Liang","year":"2017","unstructured":"Liang Luo, Ming Liu, Jacob Nelson, Luis Ceze, Amar Phanishayee, and Arvind Krishnamurthy. 2017. Motivating in-network aggregation for distributed deep neural network training. In Workshop on Approximate Computing Across the Stack."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.5555\/3691938.3691942"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582063"},{"key":"e_1_3_2_1_80_1","volume-title":"Stream benchmark. Link: www.cs.virginia.edu\/stream\/ref.html# what 22, 7","author":"McCalpin John D","year":"1995","unstructured":"John D McCalpin. 1995. Stream benchmark. Link: www.cs.virginia.edu\/stream\/ref.html# what 22, 7 (1995)."},{"key":"e_1_3_2_1_81_1","volume-title":"Hwan Doh, and Arvind Krishnamurthy. 2021. Gimbal: enabling multi-tenant storage disaggregation on SmartNIC JBOFs. In Proceedings of the 2021 ACM SIGCOMM 2021 Conference (SIGCOMM'21)","author":"Min Jaehong","unstructured":"Jaehong Min, Ming Liu, Tapan Chugh, Chenxingyu Zhao, Andrew Wei, In Hwan Doh, and Arvind Krishnamurthy. 2021. Gimbal: enabling multi-tenant storage disaggregation on SmartNIC JBOFs. In Proceedings of the 2021 ACM SIGCOMM 2021 Conference (SIGCOMM'21). 106\u2013122."},{"key":"e_1_3_2_1_82_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI'23)","author":"Min Jaehong","year":"2023","unstructured":"Jaehong Min, Chenxingyu Zhao, Ming Liu, and Arvind Krishnamurthy. 2023. eZNS: An Elastic Zoned Namespace for Commodity ZNS SSDs. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI'23). 461\u2013477."},{"key":"e_1_3_2_1_83_1","article-title":"eZNS: Elastic Zoned Namespace for Enhanced Performance Isolation and Device Utilization","volume":"20","author":"Min Jaehong","year":"2024","unstructured":"Jaehong Min, Chenxingyu Zhao, Ming Liu, and Arvind Krishnamurthy. 2024. eZNS: Elastic Zoned Namespace for Enhanced Performance Isolation and Device Utilization. ACM Trans. Storage 20, 3, Article 16 (June 2024), 41 pages.","journal-title":"ACM Trans. Storage"},{"key":"e_1_3_2_1_84_1","volume-title":"2024 IEEE International Solid-State Circuits Conference (ISSCC'24)","volume":"67","author":"Munch Ashley O.","year":"2024","unstructured":"Ashley O. Munch, Nevine Nassif, Carleton L. Molnar, Jason Crop, Rich Gammack, Chinmay P. Joshi, Goran Zelic, Kambiz Munshi, Min Huang, Charles R. Morganti, Sireesha Kandula, and Arijit Biswas. 2024. 2.3 Emerald Rapids: 5th-Generation Intel\u00ae Xeon\u00ae Scalable Processors. In 2024 IEEE International Solid-State Circuits Conference (ISSCC'24), Vol. 67. 40\u201342."},{"key":"e_1_3_2_1_85_1","volume-title":"Sapphire Rapids: The Next-Generation Intel Xeon Scalable Processor. In 2022 IEEE International Solid-State Circuits Conference (ISSCC'22)","volume":"65","author":"Nassif Nevine","year":"2022","unstructured":"Nevine Nassif, Ashley O. Munch, Carleton L. Molnar, Gerald Pasdast, Sitaraman V. Lyer, Zibing Yang, Oscar Mendoza, Mark Huddart, Srikrishnan Venkataraman, Sireesha Kandula, Rafi Marom, Alexandra M. Kern, Bill Bowhill, David R. Mulvihill, Srikanth Nimmagadda, Varma Kalidindi, Jonathan Krause, Mohammad M. Haq, Roopali Sharma, and Kevin Duda. 2022. Sapphire Rapids: The Next-Generation Intel Xeon Scalable Processor. In 2022 IEEE International Solid-State Circuits Conference (ISSCC'22), Vol. 65. 44\u201346."},{"key":"e_1_3_2_1_86_1","volume-title":"Proceedings of the 2018 Conference of the ACM Special Interest Group on Data Communication (SIGCOMM'18)","author":"Neugebauer Rolf","unstructured":"Rolf Neugebauer, Gianni Antichi, Jos\u00e9 Fernando Zazo, Yury Audzevich, Sergio L\u00f3pez-Buedo, and Andrew W. Moore. 2018. Understanding PCIe performance for end host networking. In Proceedings of the 2018 Conference of the ACM Special Interest Group on Data Communication (SIGCOMM'18). 327\u2013341."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"crossref","unstructured":"Gianna Paulin Paul Scheffler Thomas Benz Matheus A. Cavalcante Tim Fischer Manuel Eggimann Yichao Zhang Nils Wistoff Luca Bertaccini Luca Colagrande Gianmarco Ottavi Frank K. G\u00fcrkaynak Davide Rossi and Luca Benini. 2024. Occamy: A 432-Core 28.1 DP-GFLOP\/s\/W 83% FPU Utilization Dual-Chiplet Dual-HBM2E RISC-V-Based Accelerator for Stencil and Sparse Linear Algebra Computations with 8-to-64-bit Floating-Point Support in 12nm FinFET. In IEEE Symposium on VLSI Technology and Circuits 2024 Honolulu HI USA June 16\u201320 2024. 1\u20132.","DOI":"10.1109\/VLSITechnologyandCir46783.2024.10631529"},{"key":"e_1_3_2_1_88_1","volume-title":"Floem: A Programming System for NIC-Accelerated Network Applications. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)","author":"Phothilimthana Phitchaya Mangpo","year":"2018","unstructured":"Phitchaya Mangpo Phothilimthana, Ming Liu, Antoine Kaufmann, Simon Peter, Rastislav Bodik, and Thomas Anderson. 2018. Floem: A Programming System for NIC-Accelerated Network Applications. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18). 663\u2013679."},{"key":"e_1_3_2_1_89_1","volume-title":"Proceedings of the 19th ACM Workshop on Hot Topics in Networks (HotNets'20)","author":"Qiu Yiming","year":"2020","unstructured":"Yiming Qiu, Qiao Kang, Ming Liu, and Ang Chen. 2020. Clara: Performance Clarity for SmartNIC Offloading. In Proceedings of the 19th ACM Workshop on Hot Topics in Networks (HotNets'20). 16\u201322."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483583"},{"key":"e_1_3_2_1_91_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Ruan Zhenyuan","year":"2020","unstructured":"Zhenyuan Ruan, Malte Schwarzkopf, Marcos K Aguilera, and Adam Belay. 2020. {AIFM}:{High-Performance},{Application-Integrated} far memory. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 315\u2013332."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483555"},{"key":"e_1_3_2_1_93_1","volume-title":"Approximating Fair Queueing on Reconfigurable Switches. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI'18)","author":"Sharma Naveen Kr.","year":"2018","unstructured":"Naveen Kr. Sharma, Ming Liu, Kishore Atreya, and Arvind Krishnamurthy. 2018. Approximating Fair Queueing on Reconfigurable Switches. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI'18). 1\u201316."},{"key":"e_1_3_2_1_94_1","volume-title":"Programmable Calendar Queues for High-speed Packet Scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI'20)","author":"Sharma Naveen Kr.","year":"2020","unstructured":"Naveen Kr. Sharma, Chenxingyu Zhao, Ming Liu, Pravein G Kannan, Changhoon Kim, Arvind Krishnamurthy, and Anirudh Sivaraman. 2020. Programmable Calendar Queues for High-speed Packet Scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI'20). 685\u2013699."},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614256"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650061"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695968"},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672271"},{"key":"e_1_3_2_1_99_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Wang Chenxi","year":"2020","unstructured":"Chenxi Wang, Haoran Ma, Shi Liu, Yuanqi Li, Zhenyuan Ruan, Khanh Nguyen, Michael D Bond, Ravi Netravali, Miryung Kim, and Guoqing Harry Xu. 2020. Semeru: A {Memory-Disaggregated} managed runtime. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 261\u2013280."},{"key":"e_1_3_2_1_100_1","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI'24)","author":"Xiang Lingfeng","year":"2024","unstructured":"Lingfeng Xiang, Zhen Lin, Weishu Deng, Hui Lu, Jia Rao, Yifan Yuan, and Ren Wang. 2024. Nomad:{Non-Exclusive} Memory Tiering via Transactional Page Migration. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI'24). 19\u201335."},{"key":"e_1_3_2_1_101_1","volume-title":"22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25)","author":"Xie Xincheng","year":"2025","unstructured":"Xincheng Xie, Wentao Hou, Zerui Guo, and Ming Liu. 2025. Building Massive MIMO Baseband Processing on a Single-Node Supercomputer. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25). 1221\u20131242."},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613147"},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230552"},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844459"},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1145\/3053277.3053279"},{"key":"e_1_3_2_1_106_1","volume-title":"Proc. ACM Meas. Anal. Comput. Syst. 6, 2, Article 37 (June","author":"Zhao Chenxingyu","year":"2022","unstructured":"Chenxingyu Zhao, Tapan Chugh, Jaehong Min, Ming Liu, and Arvind Krishnamurthy. 2022. Dremel: Adaptive Configuration Tuning of RocksDB KV-Store. Proc. ACM Meas. Anal. Comput. Syst. 6, 2, Article 37 (June 2022), 30 pages."},{"key":"e_1_3_2_1_107_1","volume-title":"White-Boxing RDMA with Packet-Granular Software Control. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25)","author":"Zhao Chenxingyu","year":"2025","unstructured":"Chenxingyu Zhao, Jaehong Min, Ming Liu, and Arvind Krishnamurthy. 2025. White-Boxing RDMA with Packet-Granular Software Control. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI'25). 427\u2013449."},{"key":"e_1_3_2_1_108_1","volume-title":"Managing Memory Tiers with CXL in Virtualized Environments. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI'24)","author":"Zhong Yuhong","year":"2024","unstructured":"Yuhong Zhong, Daniel S. Berger, Carl Waldspurger, Ryan Wee, Ishwar Agarwal, Rajat Agarwal, Frank Hady, Karthik Kumar, Mark D. Hill, Mosharaf Chowdhury, and Asaf Cidon. 2024. Managing Memory Tiers with CXL in Virtualized Environments. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI'24). 37\u201356."}],"event":{"name":"SIGCOMM '25: ACM SIGCOMM 2025 Conference","location":"S\u00e3o Francisco Convent Coimbra Portugal","acronym":"SIGCOMM '25","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the ACM SIGCOMM 2025 Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3718958.3750479","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T16:57:52Z","timestamp":1756313872000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3718958.3750479"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,27]]},"references-count":108,"alternative-id":["10.1145\/3718958.3750479","10.1145\/3718958"],"URL":"https:\/\/doi.org\/10.1145\/3718958.3750479","relation":{},"subject":[],"published":{"date-parts":[[2025,8,27]]},"assertion":[{"value":"2025-08-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}