{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T08:26:49Z","timestamp":1768033609900,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,4]]},"DOI":"10.1145\/3757348.3757358","type":"proceedings-article","created":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T10:07:33Z","timestamp":1762855653000},"page":"79-92","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Empirical Roofline Modeling of Distributed Data Services: Mapping the Boundaries of RPC Throughput"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3963-9923","authenticated-orcid":false,"given":"Philip","family":"Carns","sequence":"first","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9293-2021","authenticated-orcid":false,"given":"Matthieu","family":"Dorier","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5285-6375","authenticated-orcid":false,"given":"Rob","family":"Latham","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5973-4195","authenticated-orcid":false,"given":"Shane","family":"Snyder","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4830-3139","authenticated-orcid":false,"given":"Amal","family":"Gueroudji","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5692-9359","authenticated-orcid":false,"given":"Seth","family":"Ockerman","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5480-1669","authenticated-orcid":false,"given":"Jerome","family":"Soumagne","sequence":"additional","affiliation":[{"name":"HPE, Spring, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4078-8149","authenticated-orcid":false,"given":"Dong","family":"Dai","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5435-5857","authenticated-orcid":false,"given":"Robert","family":"Ross","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,11]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"L. Adhianto S. Banerjee M. Fagan M. Krentel G. Marin J. Mellor-Crummey and N.\u00a0R. Tallent. 2010. HPCTOOLKIT: tools for performance analysis of optimized parallel programs. Concurrency and Computation: Practice and Experience 22 6 (2010) 685\u2013701. 10.1002\/cpe.1553 arXiv:https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/cpe.1553","DOI":"10.1002\/cpe.1553"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607089"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2010.67"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","unstructured":"Philip Carns Matthieu Dorier Rob Latham Robert\u00a0B. Ross Shane Snyder and Jerome Soumagne. 2023. Mochi: A Case Study in Translational Computer Science for High-Performance Computing Data Management. Computing in Science & Engineering 25 04 (July 2023) 35\u201341. 10.1109\/MCSE.2023.3326436","DOI":"10.1109\/MCSE.2023.3326436"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"Philip Carns Kevin Harms William Allcock Charles Bacon Samuel Lang Robert Latham and Robert Ross. 2011. Understanding and Improving Computational Science Storage Access through Continuous Characterization. ACM Trans. Storage 7 3 Article 8 (Oct. 2011) 26\u00a0pages. 10.1145\/2027066.2027068","DOI":"10.1145\/2027066.2027068"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDSW51947.2020.00006"},{"key":"e_1_3_3_2_8_2","volume-title":"4th Workshop on Interactions of NVM\/Flash with Operating Systems and Workloads (INFLOW 16)","author":"Carns Philip","year":"2016","unstructured":"Philip Carns, John Jenkins, Sangmin Seo, Shane Snyder, Robert\u00a0B Ross, Charles\u00a0D Cranor, Scott Atchley, and Torsten Hoefler. 2016. Enabling NVM for Data-Intensive Scientific Services. In 4th Workshop on Interactions of NVM\/Flash with Operating Systems and Workloads (INFLOW 16)."},{"key":"e_1_3_3_2_9_2","unstructured":"Fabio Checconi Jesmin\u00a0Jahan Tithi and Fabrizio Petrini. 2022. Ridgeline: A 2D roofline model for distributed systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.01368 (2022)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00071"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW63119.2024.00091"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-58667-0_21"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/2464996.2465020"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","unstructured":"Yanfei Guo Ken Raffenetti Hui Zhou Pavan Balaji Min Si Abdelhalim Amer Shintaro Iwasaki Sangmin Seo Giuseppe Congiu Robert Latham Lena Oden Thomas Gillis Rohit Zambre Kaiming Ouyang Charles Archer Wesley Bland Jithin Jose Sayantan Sur Hajime Fujita Dmitry Durnov Michael Chuvelev Gengbin Zheng Alex Brooks Sagar Thapaliya Taru Doodi Maria Garazan Steve Oyanagi Marc Snir and Rajeev Thakur. 0. Preparing MPICH for exascale. The International Journal of High Performance Computing Applications 0 0 (0) 10943420241311608. 10.1177\/10943420241311608 arXiv:10.1177\/10943420241311608","DOI":"10.1177\/10943420241311608"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Aleksandar Ilic Frederico Pratas and Leonel Sousa. 2013. Cache-aware roofline model: Upgrading the loft. IEEE Computer Architecture Letters 13 1 (2013) 21\u201324.","DOI":"10.1109\/L-CA.2013.6"},{"key":"e_1_3_3_2_16_2","unstructured":"J Kunkel John Bent Jay Lofstead and George\u00a0S Markomanolis. 2016. Establishing the io-500 benchmark. White Paper (2016)."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654100"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00171"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00173"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDSW54622.2021.00011"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0B Ross George Amvrosiadis Philip Carns Charles\u00a0D Cranor Matthieu Dorier Kevin Harms Greg Ganger Garth Gibson Samuel\u00a0K Gutierrez Robert Latham et\u00a0al. 2020. Mochi: Composing data services for high-performance computing environments. Journal of Computer Science and Technology (2020).","DOI":"10.1007\/s11390-020-9802-0"},{"key":"e_1_3_3_2_22_2","unstructured":"Sangmin Seo Abdelhalim Amer Pavan Balaji Cyril Bordage George Bosilca Alex Brooks Philip Carns Adrian Castello Damien Genet Thomas Herault et\u00a0al. 2017. Argobots: A Lightweight Low-Level Threading and Tasking Framework. IEEE Transactions on Parallel and Distributed Systems (2017)."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5222721"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2013.6702617"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Samuel Williams Andrew Waterman and David Patterson. 2009. Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52 4 (April 2009) 65\u201376. 10.1145\/1498765.1498785","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/P3HPC.2018.00005"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Charlene Yang Thorsten Kurth and Samuel Williams. 2020. Hierarchical Roofline analysis for GPUs: Accelerating performance optimization for the NERSC-9 Perlmutter system. Concurrency and Computation: Practice and Experience 32 20 (2020) e5547.","DOI":"10.1002\/cpe.5547"},{"key":"e_1_3_3_2_28_2","unstructured":"Zhihang Yuan Yuzhang Shang Yang Zhou Zhen Dong Zhe Zhou Chenhao Xue Bingzhe Wu Zhikai Li Qingyi Gu and Yong\u00a0Jae Lee. 2024. LLM inference unveiled: Survey and roofline model insights. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.16363 (2024)."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW59300.2023.00106"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS60453.2023.00112"}],"event":{"name":"CUG 2025: Cray User Group","location":"Jersey City USA","acronym":"CUG '25"},"container-title":["Proceedings of the Cray User Group"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3757348.3757358","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T10:28:40Z","timestamp":1762856920000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757348.3757358"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,4]]},"references-count":29,"alternative-id":["10.1145\/3757348.3757358","10.1145\/3757348"],"URL":"https:\/\/doi.org\/10.1145\/3757348.3757358","relation":{},"subject":[],"published":{"date-parts":[[2025,5,4]]},"assertion":[{"value":"2025-11-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}