{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:40:08Z","timestamp":1750506008059,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":134,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T00:00:00Z","timestamp":1750377600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["1908601"],"award-info":[{"award-number":["1908601"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"JST PRESTO","award":["JPMJPR22P7"],"award-info":[{"award-number":["JPMJPR22P7"]}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","award":["89233218CNA000001"],"award-info":[{"award-number":["89233218CNA000001"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731015","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"1641-1658","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DX100: Programmable Data Access Accelerator for Indirection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7615-5514","authenticated-orcid":false,"given":"Alireza","family":"Khadem","sequence":"first","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7282-5165","authenticated-orcid":false,"given":"Kamalavasan","family":"Kamalakkannan","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory, Los Alamos, New Mexico, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6826-0095","authenticated-orcid":false,"given":"Zhenyan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1251-9210","authenticated-orcid":false,"given":"Akash","family":"Poptani","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1918-7533","authenticated-orcid":false,"given":"Yufeng","family":"Gu","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7021-3077","authenticated-orcid":false,"given":"Jered Benjamin","family":"Dominguez-Trujillo","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory, Los Alamos, New Mexico, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2457-4119","authenticated-orcid":false,"given":"Nishil","family":"Talati","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7949-0417","authenticated-orcid":false,"given":"Daichi","family":"Fujiki","sequence":"additional","affiliation":[{"name":"Institute of Science Tokyo, Meguro, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0438-0616","authenticated-orcid":false,"given":"Scott","family":"Mahlke","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6297-2145","authenticated-orcid":false,"given":"Galen","family":"Shipman","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory, Los Alamos, New Mexico, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5894-8342","authenticated-orcid":false,"given":"Reetuparna","family":"Das","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750386"},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2017.7863749"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"crossref","unstructured":"Sam Ainsworth and Timothy\u00a0M Jones. 2018. An event-triggered programmable prefetcher for irregular workloads. ACM Sigplan Notices 53 2 (2018) 578\u2013592.","DOI":"10.1145\/3296957.3173189"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527385"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00016"},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/125826.125932"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/125826.125925"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00021"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00053"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00047"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544839"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Ronald Barber Guy Lohman Ippokratis Pandis Vijayshankar Raman Richard Sidle Gopi Attaluri Naresh Chainani Sam Lightstone and David Sharpe. 2014. Memory-efficient hash joins. Proceedings of the VLDB Endowment 8 4 (2014) 353\u2013364.","DOI":"10.14778\/2735496.2735499"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00051"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00051"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.50"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.112"},{"key":"e_1_3_3_3_18_2","unstructured":"Scott Beamer Krste Asanovi\u0107 and David Patterson. 2017. The GAP Benchmark Suite. arxiv:https:\/\/arXiv.org\/abs\/1508.03619\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/1508.03619"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"crossref","unstructured":"Michael Bekerman Stephan Jourdan Ronny Ronen Gilad Kirshenboim Lihu Rappoport Adi Yoaz and Uri Weiser. 1999. Correlated load-address predictors. ACM SIGARCH Computer Architecture News 27 2 (1999) 54\u201363.","DOI":"10.1145\/307338.300984"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358325"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","unstructured":"Marsha\u00a0J Berger and Joseph Oliger. 1984. Adaptive mesh refinement for hyperbolic partial differential equations. J. Comput. Phys. 53 3 (1984) 484\u2013512. 10.1016\/0021-9991(84)90073-1","DOI":"10.1016\/0021-9991(84)90073-1"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","unstructured":"Nathan Binkert Bradford Beckmann Gabriel Black Steven\u00a0K. Reinhardt Ali Saidi Arkaprava Basu Joel Hestness Derek\u00a0R. Hower Tushar Krishna Somayeh Sardashti Rathijit Sen Korey Sewell Muhammad Shoaib Nilay Vaish Mark\u00a0D. Hill and David\u00a0A. Wood. 2011. The gem5 simulator. SIGARCH Comput. Archit. News 39 2 (Aug. 2011) 1\u20137. 10.1145\/2024716.2024718","DOI":"10.1145\/2024716.2024718"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","unstructured":"Ulrik Brandes. 2001. A faster algorithm for betweenness centrality*. The Journal of Mathematical Sociology 25 2 (2001) 163\u2013177. 10.1080\/0022250X.2001.9990249","DOI":"10.1080\/0022250X.2001.9990249"},{"key":"e_1_3_3_3_24_2","volume-title":"A gem5 experimental repo in order to explore Data-dependent Access (DDA).","author":"CAG\u00a0group Institute of AI & Robotics of Xi\u2019an Jiaotong\u00a0Univiersity","year":"2024","unstructured":"Institute of AI & Robotics of Xi\u2019an Jiaotong\u00a0Univiersity CAG\u00a0group. 2024. A gem5 experimental repo in order to explore Data-dependent Access (DDA).https:\/\/github.com\/xjtuiair-cag\/gem5_dda\/tree\/dmp-paper"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE56975.2023.10137240"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","unstructured":"Yu-Hsin Chen Tushar Krishna Joel\u00a0S. Emer and Vivienne Sze. 2017. Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks. IEEE Journal of Solid-State Circuits 52 1 (2017) 127\u2013138. 10.1109\/JSSC.2016.2616357","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.23"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358276"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263597"},{"key":"e_1_3_3_3_31_2","unstructured":"R\u00a0D Falgout J\u00a0E Jones and U\u00a0M Yang. 2004. The Design and Implementation of hypre a Library of Parallel High Performance Preconditioners. Lecture Notes in Computational Science and Engineering 51 (7 2004). https:\/\/www.osti.gov\/biblio\/875356"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527395"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.2018.8502276"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00040"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","unstructured":"Michael Gittings Robert Weaver Michael Clover Thomas Betlach Nelson Byrne Robert Coker Edward Dendy Robert Hueckstaedt Kim New W\u00a0Rob Oakes Dale Ranta and Ryan Stefan. 2008. The RAGE radiation-hydrodynamic code. Computational Science & Discovery 1 1 (Nov. 2008) 015005. 10.1088\/1749-4699\/1\/1\/015005","DOI":"10.1088\/1749-4699\/1\/1\/015005"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","unstructured":"John\u00a0W. Grove. 2019. Eulerian Applications Project - xRage Introduction and Overview. (5 2019). 10.2172\/1532688","DOI":"10.2172\/1532688"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589060"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783759"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"crossref","unstructured":"Song Han Xingyu Liu Huizi Mao Jing Pu Ardavan Pedram Mark\u00a0A Horowitz and William\u00a0J Dally. 2016. EIE: Efficient inference engine on compressed deep neural network. ACM SIGARCH Computer Architecture News 44 3 (2016) 243\u2013254.","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.46"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392751"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358275"},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"publisher","unstructured":"Paul Henning and USDOE National Nuclear\u00a0Security Administration. 2023. Ume: Unstructured Mesh Explorations. 10.11578\/dc.20230602.5","DOI":"10.11578\/dc.20230602.5"},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.2172\/1113870"},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967958"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2003.1183549"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1109\/MICRO.2004.4","volume-title":"37th International Symposium on Microarchitecture (MICRO-37\u201904)","author":"Hur Ibrahim","year":"2004","unstructured":"Ibrahim Hur and Calvin Lin. 2004. Adaptive history-based memory schedulers. In 37th International Symposium on Microarchitecture (MICRO-37\u201904). IEEE, 343\u2013354."},{"volume-title":"Intel\u00ae 64 and IA-32 Architectures Software Developer Manuals","year":"2025","key":"e_1_3_3_3_49_2","unstructured":"Intel. 2025. Intel\u00ae 64 and IA-32 Architectures Software Developer Manuals. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/intel-sdm.html"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"crossref","unstructured":"Engin Ipek Onur Mutlu Jos\u00e9\u00a0F Mart\u00ednez and Rich Caruana. 2008. Self-optimizing memory controllers: A reinforcement learning approach. ACM SIGARCH Computer Architecture News 36 3 (2008) 39\u201350.","DOI":"10.1145\/1394608.1382172"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540730"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3519583"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","unstructured":"Supreet Jeloka Naveen\u00a0Bharathwaj Akesh Dennis Sylvester and David Blaauw. 2016. A 28 nm Configurable Memory (TCAM\/BCAM\/SRAM) Using Push-Rule 6T Bit Cell Enabling Logic-in-Memory. IEEE Journal of Solid-State Circuits 51 4 (2016) 1009\u20131021. 10.1109\/JSSC.2016.2515510","DOI":"10.1109\/JSSC.2016.2515510"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/264107.264207"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2014.6893192"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","unstructured":"Changkyu Kim Tim Kaldewey Victor\u00a0W. Lee Eric Sedlar Anthony\u00a0D. Nguyen Nadathur Satish Jatin Chhugani Andrea Di\u00a0Blas and Pradeep Dubey. 2009. Sort vs. Hash revisited: fast join implementation on modern multi-core CPUs. Proc. VLDB Endow. 2 2 (Aug. 2009) 1378\u20131389. 10.14778\/1687553.1687564","DOI":"10.14778\/1687553.1687564"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967948"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540748"},{"key":"e_1_3_3_3_60_2","volume-title":"IEEE Hot Chips Symposium (HCS)","author":"Kumar Akhilesh","year":"2017","unstructured":"Akhilesh Kumar, Don Soltis, Irma Esmer, Adi Yoaz, and Sailesh Kottapalli. 2017. The new Intel Xeon scalable processor (formerly skylake-SP). In IEEE Hot Chips Symposium (HCS)."},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628118"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","DOI":"10.5555\/3026877.3026931"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835970"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3422575.3422794"},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00092"},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"publisher","unstructured":"Ruipeng Li and Ulrike\u00a0M. Yang. 2023. AMG2023. [Computer Software] 10.11578\/dc.20230413.1. 10.11578\/dc.20230413.1","DOI":"10.11578\/dc.20230413.1"},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/3343737.3343745"},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"publisher","DOI":"10.5555\/225160.225197"},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"crossref","unstructured":"MTP Liska Koushik Chatterjee D Issa Doosoo Yoon N Kaaz A Tchekhovskoy D Van\u00a0Eijnatten G Musoke C Hesp V Rohoza et\u00a0al. 2022. H-AMR: A New GPU-accelerated GRMHD Code for Exascale Computing with 3D Adaptive Mesh Refinement and Local Adaptive Time Stepping. The Astrophysical Journal Supplement Series 263 2 (2022) 26.","DOI":"10.3847\/1538-4365\/ac9966"},{"key":"e_1_3_3_3_71_2","unstructured":"Los Alamos National Laboratory (LANL). 2024. ATS-5: The Fifth Advanced Technology System in the Advanced Simulation and Computing Program. https:\/\/mission.lanl.gov\/advanced-simulation-and-computing\/platforms\/ats-5\/. Accessed: 2024-11-15."},{"key":"e_1_3_3_3_72_2","unstructured":"Haocong Luo Yahya\u00a0Can Tu\u011frul F.\u00a0Nisa Bostanc\u0131 Ataberk Olgun A.\u00a0Giray Ya\u011fl\u0131k\u00e7\u0131 and Onur Mutlu. 2023. Ramulator 2.0: A Modern Modular and Extensible DRAM Simulator."},{"key":"e_1_3_3_3_73_2","doi-asserted-by":"publisher","unstructured":"S. Manegold P. Boncz and M. Kersten. 2002. Optimizing main-memory join on modern hardware. IEEE Transactions on Knowledge and Data Engineering 14 4 (2002) 709\u2013730. 10.1109\/TKDE.2002.1019210","DOI":"10.1109\/TKDE.2002.1019210"},{"key":"e_1_3_3_3_74_2","doi-asserted-by":"crossref","unstructured":"Sally\u00a0A McKee William\u00a0A Wulf James\u00a0H Aylor Robert\u00a0H Klenke Maximo\u00a0H Salinas Sung\u00a0I Hong and Dee\u00a0AB Weikle. 2000. Dynamic access ordering for streamed computations. IEEE Trans. Comput. 49 11 (2000) 1255\u20131271.","DOI":"10.1109\/12.895941"},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"publisher","unstructured":"U. Meyer and P. Sanders. 2003. \u0394 -stepping: a parallelizable shortest path algorithm. J. Algorithms 49 1 (Oct. 2003) 114\u2013152. 10.1016\/S0196-6774(03)00076-2","DOI":"10.1016\/S0196-6774(03)00076-2"},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3319647.3325839"},{"key":"e_1_3_3_3_77_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446087"},{"key":"e_1_3_3_3_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00011"},{"key":"e_1_3_3_3_79_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.42"},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"crossref","unstructured":"Todd Mowry and Anoop Gupta. 1991. Tolerating latency through software-controlled prefetching in shared-memory multiprocessors. Journal of parallel and Distributed Computing 12 2 (1991) 87\u2013106.","DOI":"10.1016\/0743-7315(91)90014-Z"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/143365.143488"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00010"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358254"},{"key":"e_1_3_3_3_84_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582069"},{"key":"e_1_3_3_3_85_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.49"},{"key":"e_1_3_3_3_86_2","doi-asserted-by":"crossref","unstructured":"Onur Mutlu and Thomas Moscibroda. 2008. Parallelism-aware batch scheduling: Enhancing both performance and fairness of shared DRAM systems. ACM SIGARCH Computer Architecture News 36 3 (2008) 63\u201374.","DOI":"10.1145\/1394608.1382128"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2003.1183532"},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00024"},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00040"},{"key":"e_1_3_3_3_90_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614255"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"crossref","unstructured":"Ajeya Naithani Jaime Roelandts Sam Ainsworth Timothy\u00a0M Jones and Lieven Eeckhout. 2024. Decoupled Vector Runahead for Prefetching Nested Memory-Access Chains. IEEE Micro (2024).","DOI":"10.1109\/MM.2024.3406891"},{"key":"e_1_3_3_3_92_2","doi-asserted-by":"publisher","DOI":"10.1145\/2611354.2611365"},{"key":"e_1_3_3_3_93_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731694"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527400"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM59182.2024.00097"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"publisher","unstructured":"Yunjie Pan Jiecao Yu Andrew Lukefahr Reetuparna Das and Scott Mahlke. 2023. BitSET: Bit-Serial Early Termination for Computation Reduction in Convolutional Neural Networks. ACM Trans. Embed. Comput. Syst. 22 5s Article 98 (Sept. 2023) 24\u00a0pages. 10.1145\/3609093","DOI":"10.1145\/3609093"},{"key":"e_1_3_3_3_98_2","doi-asserted-by":"publisher","unstructured":"Ashish Panwar Aravinda Prasad and K. Gopinath. 2018. Making Huge Pages Actually Useful. SIGPLAN Not. 53 2 (March 2018) 679\u2013692. 10.1145\/3296957.3173203","DOI":"10.1145\/3296957.3173203"},{"key":"e_1_3_3_3_99_2","doi-asserted-by":"crossref","unstructured":"Irma\u00a0Esmer Papazian Sailesh Kottapalli Jeff Baxter Jeff Chamberlain Geetha Vedaraman and Brian Morris. 2015. Ivy Bridge server: A converged design. IEEE Micro 35 2 (2015) 16\u201325.","DOI":"10.1109\/MM.2015.33"},{"key":"e_1_3_3_3_100_2","doi-asserted-by":"crossref","unstructured":"Angshuman Parashar Minsoo Rhu Anurag Mukkara Antonio Puglielli Rangharajan Venkatesan Brucek Khailany Joel Emer Stephen\u00a0W Keckler and William\u00a0J Dally. 2017. SCNN: An accelerator for compressed-sparse convolutional neural networks. ACM SIGARCH computer architecture news 45 2 (2017) 27\u201340.","DOI":"10.1145\/3140659.3080254"},{"key":"e_1_3_3_3_101_2","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749473"},{"key":"e_1_3_3_3_102_2","doi-asserted-by":"crossref","unstructured":"Yue Peng Bailin Deng Juyong Zhang Fanyu Geng Wenjie Qin and Ligang Liu. 2018. Anderson acceleration for geometry optimization and physics simulation. ACM Transactions on Graphics (TOG) 37 4 (2018) 1\u201314.","DOI":"10.1145\/3197517.3201290"},{"key":"e_1_3_3_3_103_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2004.22"},{"key":"e_1_3_3_3_104_2","doi-asserted-by":"publisher","DOI":"10.1145\/339647.339668"},{"key":"e_1_3_3_3_105_2","doi-asserted-by":"crossref","unstructured":"Jaime Roelandts Ajeya Naithani Sam Ainsworth Timothy\u00a0M Jones and Lieven Eeckhout. 2024. Scalar Vector Runahead. (2024).","DOI":"10.1109\/MICRO61859.2024.00101"},{"key":"e_1_3_3_3_106_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480047"},{"key":"e_1_3_3_3_107_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527379"},{"key":"e_1_3_3_3_108_2","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830820"},{"key":"e_1_3_3_3_109_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346206"},{"key":"e_1_3_3_3_110_2","doi-asserted-by":"publisher","DOI":"10.1145\/3695794.3695816"},{"key":"e_1_3_3_3_111_2","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830793"},{"key":"e_1_3_3_3_112_2","doi-asserted-by":"publisher","unstructured":"Yossi Shiloach and Uzi Vishkin. 1982. An O(logn) parallel connectivity algorithm. Journal of Algorithms 3 1 (1982) 57\u201367. 10.1016\/0196-6774(82)90008-6","DOI":"10.1016\/0196-6774(82)90008-6"},{"key":"e_1_3_3_3_113_2","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC56545.2022.00009"},{"key":"e_1_3_3_3_114_2","doi-asserted-by":"crossref","unstructured":"Galen\u00a0M Shipman Jason Pruet David Daniel Josh Dolence Gary Grider Brian\u00a0M Haines Aimee Hungerford Stephen Poole Tim Randles Sriram Swaminarayan et\u00a0al. 2022. The future of HPC in nuclear security. IEEE Internet Computing 27 1 (2022) 16\u201323.","DOI":"10.1109\/MIC.2022.3229037"},{"key":"e_1_3_3_3_115_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614284"},{"key":"e_1_3_3_3_116_2","doi-asserted-by":"publisher","DOI":"10.5555\/800048.801719"},{"key":"e_1_3_3_3_117_2","doi-asserted-by":"publisher","DOI":"10.5555\/2028905"},{"key":"e_1_3_3_3_118_2","doi-asserted-by":"crossref","unstructured":"Sriseshan Srikanth Anirudh Jain Thomas\u00a0M Conte Erik\u00a0P Debenedictis and Jeanine Cook. 2021. SortCache: intelligent cache management for accelerating sparse data workloads. ACM Transactions on Architecture and Code Optimization (TACO) 18 4 (2021) 1\u201324.","DOI":"10.1145\/3473332"},{"key":"e_1_3_3_3_119_2","doi-asserted-by":"publisher","unstructured":"Aaron Stillmaker and Bevan Baas. 2017. Scaling equations for the accurate prediction of CMOS device performance from 180nm to 7nm. Integration 58 (2017) 74\u201381. 10.1016\/j.vlsi.2017.02.002","DOI":"10.1016\/j.vlsi.2017.02.002"},{"key":"e_1_3_3_3_120_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00061"},{"key":"e_1_3_3_3_121_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310170"},{"key":"e_1_3_3_3_122_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS59251.2023.10254726"},{"key":"e_1_3_3_3_123_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00039"},{"key":"e_1_3_3_3_124_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00083"},{"key":"e_1_3_3_3_125_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798239"},{"volume-title":"Skylake (client) - Microarchitectures - Intel","year":"2025","key":"e_1_3_3_3_126_2","unstructured":"WikiChip. 2025. Skylake (client) - Microarchitectures - Intel. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/skylake_(client)"},{"key":"e_1_3_3_3_127_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358300"},{"key":"e_1_3_3_3_128_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00055"},{"key":"e_1_3_3_3_129_2","doi-asserted-by":"publisher","DOI":"10.1109\/DNSR.2004.1344743"},{"key":"e_1_3_3_3_130_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358318"},{"key":"e_1_3_3_3_131_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00087"},{"key":"e_1_3_3_3_132_2","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830807"},{"key":"e_1_3_3_3_133_2","doi-asserted-by":"crossref","unstructured":"Chao Zhang Maximilian Bremer Cy Chan John Shalf and Xiaochen Guo. 2022. ASA: A ccelerating S parse A ccumulation in Column-wise SpGEMM. ACM Transactions on Architecture and Code Optimization (TACO) 19 4 (2022) 1\u201324.","DOI":"10.1145\/3543068"},{"key":"e_1_3_3_3_134_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358272"},{"key":"e_1_3_3_3_135_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Tokyo Japan","acronym":"SIGARCH '25"},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731015","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731015","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:03:09Z","timestamp":1750503789000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731015"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":134,"alternative-id":["10.1145\/3695053.3731015","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731015","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}