{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T20:59:36Z","timestamp":1757624376813,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3731545.3731585","type":"proceedings-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T12:46:16Z","timestamp":1757421976000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Grudon: A System for Deploying Graph Workloads on Disaggregated Architectures with Near-Data Processing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2460-3768","authenticated-orcid":false,"given":"Vishal","family":"Rao","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8631-3080","authenticated-orcid":false,"given":"Nikhil Ram","family":"Shashidhar","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5526-6127","authenticated-orcid":false,"given":"Suyeon","family":"Lee","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4199-2512","authenticated-orcid":false,"given":"Ada","family":"Gavrilovska","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. AMD EPYC\u2122 9005 Series Processors \u2014 amd.com. https:\/\/www.amd.com\/en\/products\/processors\/server\/epyc\/9005-series.html. [Accessed 08-12-2024]."},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. CXL Use Case - MemVerge \u2014 memverge.com. https:\/\/memverge.com\/cxl-use-case-slash-memory-costs-and-expand-capacity\/. [Accessed 24-12-2024]."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. Intel\u00ae Xeon\u00ae Platinum 8480+ Processor (105M Cache 2.00 GHz) - Product Specifications | Intel \u2014 intel.com. https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/231746\/intel-xeon-platinum-8480-processor-105m-cache-2-00-ghz\/specifications.html. [Accessed 08-12-2024]."},{"key":"e_1_3_2_1_4_1","unstructured":"[n.d.]. Marvell Structera. https:\/\/www.marvell.com\/content\/dam\/marvell\/en\/public-collateral\/assets\/marvell-structera-a-2504-near-memory-accelerator-product-brief.pdf. [Accessed 05-02-2025]."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. MICRON\u00ae DDR5 128GB RDIMM \u2014 micron.com. https:\/\/www.micron.com\/content\/dam\/micron\/global\/public\/documents\/products\/product-flyer\/128gb-ddr5-rdimm-product-brief.pdf. [Accessed 08-12-2024]."},{"key":"e_1_3_2_1_6_1","volume-title":"d.]. Samsung PIM\/PNM for Transformer based AI. Samsung Semiconductor. Retrieved","author":"Samsung Semiconductor","year":"2024","unstructured":"Samsung Semiconductor [n. d.]. Samsung PIM\/PNM for Transformer based AI. Samsung Semiconductor. Retrieved Jan 17, 2024 from https:\/\/hc2023.hotchips.org\/assets\/program\/conference\/day1\/PIM\/23_HC35_PIM_PNM_Samsung_final.pdf"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750386"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2858250"},{"key":"e_1_3_2_1_9_1","volume-title":"Applications and challenges in large-scale graph analysis. Georgia Tech, Computational Science and Engineering","author":"Bader David","year":"2013","unstructured":"David Bader, Jason Riedy, and Henning Meyerhenke. 2013. Applications and challenges in large-scale graph analysis. Georgia Tech, Computational Science and Engineering (2013), 1\u201320."},{"key":"e_1_3_2_1_10_1","volume-title":"Octopus: Scalable Low-Cost CXL Memory Pooling. arXiv preprint arXiv:2501.09020","author":"Berger Daniel S","year":"2025","unstructured":"Daniel S Berger, Yuhong Zhong, Pantea Zardoshti, Shuwei Teng, Fiodar Kazhamiaka, and Rodrigo Fonseca. 2025. Octopus: Scalable Low-Cost CXL Memory Pooling. arXiv preprint arXiv:2501.09020 (2025)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298989"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3192366.3192404"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_1_14_1","volume-title":"10th USENIX symposium on operating systems design and implementation (OSDI 12)","author":"Gonzalez Joseph E","year":"2012","unstructured":"Joseph E Gonzalez, Yucheng Low, Haijie Gu, Danny Bickson, and Carlos Guestrin. 2012. {PowerGraph}: Distributed {Graph-Parallel} computation on natural graphs. In 10th USENIX symposium on operating systems design and implementation (OSDI 12). 17\u201330."},{"key":"e_1_3_2_1_15_1","volume-title":"11th USENIX symposium on operating systems design and implementation (OSDI 14)","author":"Gonzalez Joseph E","year":"2014","unstructured":"Joseph E Gonzalez, Reynold S Xin, Ankur Dave, Daniel Crankshaw, Michael J Franklin, and Ion Stoica. 2014. {GraphX} : Graph processing in a distributed dataflow framework. In 11th USENIX symposium on operating systems design and implementation (OSDI 14). 599\u2013613."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMHPC.2016.006"},{"key":"e_1_3_2_1_17_1","volume-title":"CMS: A Computational Memory Solution for High-Performance and Power-Efficient Recommendation System. In 2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","author":"Ha Minho","year":"2022","unstructured":"Minho Ha, Joonseop Sim, Donguk Moon, Myunghyun Rhee, Jungmin Choi, Byungil Koh, Euicheol Lim, and Kyoung Park. 2022. CMS: A Computational Memory Solution for High-Performance and Power-Efficient Recommendation System. In 2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS). IEEE, 491\u2013494."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00051"},{"key":"e_1_3_2_1_19_1","volume-title":"Graphicionado: A high-performance and energy-efficient accelerator for graph analytics. In 2016 49th annual IEEE\/ACM international symposium on microarchitecture (MICRO)","author":"Ham Tae Jun","year":"2016","unstructured":"Tae Jun Ham, Lisa Wu, Narayanan Sundaram, Nadathur Satish, and Margaret Martonosi. 2016. Graphicionado: A high-performance and energy-efficient accelerator for graph analytics. In 2016 49th annual IEEE\/ACM international symposium on microarchitecture (MICRO). IEEE, 1\u201313."},{"key":"e_1_3_2_1_20_1","volume-title":"Udon: A case for offloading to general purpose compute on cxl memory. arXiv preprint arXiv:2404.02868","author":"Hermes Jon","year":"2024","unstructured":"Jon Hermes, Josh Minor, Minjun Wu, Adarsh Patil, and Eric Van Hensbergen. 2024. Udon: A case for offloading to general purpose compute on cxl memory. arXiv preprint arXiv:2404.02868 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2013.6557149"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 2024 ACM Symposium on Cloud Computing. 737\u2013754","author":"Khadirsharbiyani Soheil","year":"2024","unstructured":"Soheil Khadirsharbiyani, Nima Elyasi, Armin Haj Aboutalebi, Chun-Yi Liu, Changho Choi, and Mahmut Taylan Kandemir. 2024. SmartGraph: A Framework for Graph Processing in Computational Storage. In Proceedings of the 2024 ACM Symposium on Cloud Computing. 737\u2013754."},{"key":"e_1_3_2_1_23_1","volume-title":"Ramulator: A fast and extensible DRAM simulator","author":"Kim Yoongu","year":"2015","unstructured":"Yoongu Kim, Weikun Yang, and Onur Mutlu. 2015. Ramulator: A fast and extensible DRAM simulator. IEEE Computer architecture letters 15, 1 (2015), 45\u201349."},{"key":"e_1_3_2_1_24_1","volume-title":"2014 43rd International Conference on Parallel Processing Workshops. IEEE, 95\u2013103","author":"Kumar Dinesh","year":"2014","unstructured":"Dinesh Kumar, Arun Raj, Deepankar Patra, and Dharanipragada Janakiram. 2014. Graphive: Heterogeneity-aware adaptive graph partitioning in graphlab. In 2014 43rd International Conference on Parallel Processing Workshops. IEEE, 95\u2013103."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings 16th International Parallel and Distributed Processing Symposium. IEEE, 7\u2013pp.","author":"Kumar Shailendra","year":"2002","unstructured":"Shailendra Kumar, Sajal K Das, and Rupak Biswas. 2002. Graph partitioning for parallel applications in heterogeneous grid environments. In Proceedings 16th International Parallel and Distributed Processing Symposium. IEEE, 7\u2013pp."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607070"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"2","author":"Liu Yushi","year":"2024","unstructured":"Yushi Liu, Shixuan Sun, Zijun Li, Quan Chen, Sen Gao, Bingsheng He, Chao Li, and Minyi Guo. 2024. FaaSGraph: Enabling Scalable, Efficient, and Cost-Effective Graph Processing with Serverless Computing. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2. 385\u2013400."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807184"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818185"},{"key":"e_1_3_2_1_31_1","unstructured":"Neo4j. [n. d.]. Neo4j Awards Technology Grant to Syracuse University for Mapping Misinformation Trends in 2024 U.S. Elections with Knowledge Graphs \u2014 prnewswire.com. https:\/\/www.prnewswire.com\/news-releases\/neo4j-awards-technology-grant-to-syracuse-university-for-mapping-misinformation-trends-in-2024-us-elections-with-knowledge-graphs-302137848.html. [Accessed 07-02-2025]."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522739"},{"key":"e_1_3_2_1_33_1","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Nider Joel","year":"2021","unstructured":"Joel Nider, Craig Mustard, Andrada Zoltan, John Ramsden, Larry Liu, Jacob Grossbard, Mohammad Dashti, Romaric Jodin, Alexandre Ghiti, Jordi Chauzi, and Alexandra Fedorova. 2021. A Case Study of Processing-in-Memory in off-the-Shelf Systems. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). USENIX Association, 117\u2013130."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Sang-Soo Park KyungSoo Kim Jinin So Jin Jung Jonggeon Lee Kyoungwan Woo Nayeon Kim Younghyun Lee Hyungyo Kim Yongsuk Kwon et al. 2024. An LPDDR-based CXL-PNM Platform for TCO-efficient Inference of Transformer-based Large Language Models. In 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE 970\u2013982.","DOI":"10.1109\/HPCA57654.2024.00078"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2815400.2815408"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434642"},{"key":"e_1_3_2_1_37_1","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Sapio Amedeo","year":"2021","unstructured":"Amedeo Sapio, Marco Canini, Chen-Yu Ho, Jacob Nelson, Panos Kalnis, Changhoon Kim, Arvind Krishnamurthy, Masoud Moshref, Dan Ports, and Peter Richt\u00e1rik. 2021. Scaling distributed machine learning with {In-Network} aggregation. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). 785\u2013808."},{"key":"e_1_3_2_1_38_1","unstructured":"Samsung Semiconductor. [n. d.]. Samsung CXL Memory Module - Box. https:\/\/download.semiconductor.samsung.com\/resources\/white-paper\/CMM-B_whitepaper-V2.pdf. [Accessed 30-04-2025]."},{"key":"e_1_3_2_1_39_1","volume-title":"TEGRA-Scaling Up Terascale Graph Processing with Disaggregated Computing. arXiv preprint arXiv:2404.03155","author":"Shaddix William","year":"2024","unstructured":"William Shaddix, Mahyar Samani, Marjan Fariborz, SJ Yoo, Jason Lowe-Power, and Venkatesh Akella. 2024. TEGRA-Scaling Up Terascale Graph Processing with Disaggregated Computing. arXiv preprint arXiv:2404.03155 (2024)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3128571"},{"key":"e_1_3_2_1_41_1","volume-title":"Resource-aware scientific computation on a heterogeneous cluster. Computing in science & engineering 7, 2","author":"Teresco James D","year":"2005","unstructured":"James D Teresco, J Fair, and Joseph E Flaherty. 2005. Resource-aware scientific computation on a heterogeneous cluster. Computing in science & engineering 7, 2 (2005), 40\u201350."},{"key":"e_1_3_2_1_42_1","volume-title":"2019 18th International Symposium on Parallel and Distributed Computing (ISPDC). IEEE, 66\u201373","author":"Toader Lucian","year":"2019","unstructured":"Lucian Toader, Alexandru Uta, Ahmed Musaafir, and Alexandru Iosup. 2019. Graphless: Toward serverless graph processing. In 2019 18th International Symposium on Parallel and Distributed Computing (ISPDC). IEEE, 66\u201373."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 16th Annual Middleware Conference. 37\u201349","author":"Volos Haris","year":"2015","unstructured":"Haris Volos, Guilherme Magalhaes, Ludmila Cherkasova, and Jun Li. 2015. Quartz: A lightweight performance emulator for persistent memory software. In Proceedings of the 16th Annual Middleware Conference. 37\u201349."},{"key":"e_1_3_2_1_44_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Vora Keval","year":"2019","unstructured":"Keval Vora. 2019. {LUMOS}:{Dependency-Driven} disk-based graph processing. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). 429\u2013442."},{"key":"e_1_3_2_1_45_1","volume-title":"2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, 1029\u20131039","author":"Wang Jing","year":"2022","unstructured":"Jing Wang, Chao Li, Taolei Wang, Lu Zhang, Pengyu Wang, Junyi Mei, and Minyi Guo. 2022. Excavating the potential of graph workload on rdma-based far memory architecture. In 2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, 1029\u20131039."},{"key":"e_1_3_2_1_46_1","volume-title":"2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, 81\u201392","author":"Zahka Daniel","year":"2022","unstructured":"Daniel Zahka and Ada Gavrilovska. 2022. FAM-Graph: Graph analytics on disaggregated memory. In 2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, 81\u201392."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00053"},{"key":"e_1_3_2_1_48_1","volume-title":"DFabric: Scaling Out Data Parallel Applications with CXL-Ethernet Hybrid Interconnects. arXiv preprint arXiv:2409.05404","author":"Zhang Xu","year":"2024","unstructured":"Xu Zhang, Ke Liu, Yisong Chang, Ke Zhang, and Mingyu Chen. 2024. DFabric: Scaling Out Data Parallel Applications with CXL-Ethernet Hybrid Interconnects. arXiv preprint arXiv:2409.05404 (2024)."},{"key":"e_1_3_2_1_49_1","volume-title":"Load Balanced PIM-Based Graph Processing. ACM Transactions on Design Automation of Electronic Systems","author":"Zhao Xiang","year":"2024","unstructured":"Xiang Zhao, Song Chen, and Yi Kang. 2024. Load Balanced PIM-Based Graph Processing. ACM Transactions on Design Automation of Electronic Systems (2024)."},{"key":"e_1_3_2_1_50_1","volume-title":"13th USENIX Conference on File and Storage Technologies (FAST 15)","author":"Zheng Da","year":"2015","unstructured":"Da Zheng, Disa Mhembere, Randal Burns, Joshua Vogelstein, Carey E Priebe, and Alexander S Szalay. 2015. {FlashGraph}: Processing {Billion-Node} graphs on an array of commodity {SSDs}. In 13th USENIX Conference on File and Storage Technologies (FAST 15). 45\u201358."},{"key":"e_1_3_2_1_51_1","volume-title":"12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Zhu Xiaowei","year":"2016","unstructured":"Xiaowei Zhu, Wenguang Chen, Weimin Zheng, and Xiaosong Ma. 2016. Gemini: A { Computation-Centric} Distributed Graph Processing System. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). 301\u2013316."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358256"}],"event":{"name":"HPDC '25: 34th International Symposium on High-Performance Parallel and Distributed Computing","location":"University of Notre Dame Conference Facilities Notre Dame IN USA","acronym":"HPDC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 34th International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731545.3731585","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T12:47:55Z","timestamp":1757422075000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731545.3731585"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":52,"alternative-id":["10.1145\/3731545.3731585","10.1145\/3731545"],"URL":"https:\/\/doi.org\/10.1145\/3731545.3731585","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-09-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}