{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:24Z","timestamp":1755907644639,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3734530","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"1005-1019","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EDAN: Towards Understanding Memory Parallelism and Latency Sensitivity in HPC"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0061-5072","authenticated-orcid":false,"given":"Siyuan","family":"Shen","sequence":"first","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0862-4662","authenticated-orcid":false,"given":"Mikhail","family":"Khalilov","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5975-4526","authenticated-orcid":false,"given":"Lukas","family":"Gianinazzi","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4884-3934","authenticated-orcid":false,"given":"Timo","family":"Schneider","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7654-6038","authenticated-orcid":false,"given":"Marcin","family":"Chrapek","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1715-364X","authenticated-orcid":false,"given":"Jai","family":"Dayal","sequence":"additional","affiliation":[{"name":"Cerebras Systems, Sunnyvale, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6589-4209","authenticated-orcid":false,"given":"Manisha","family":"Gajbe","sequence":"additional","affiliation":[{"name":"N\/A, Folsom, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5063-8004","authenticated-orcid":false,"given":"Robert","family":"Wisniewski","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, Ossining, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1333-9797","authenticated-orcid":false,"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Z\u00fcrich, Switzerland, Z\u00fcrich, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Tiago A.\u00a0O. Alves Leandro A.\u00a0J. Marzulo Sandip Kundu and Felipe M.\u00a0G. Fran\u00e7a. 2021. Concurrency Analysis in Dynamic Dataflow Graphs. IEEE Transactions on Emerging Topics in Computing 9 1 (2021) 44\u201354. https:\/\/doi.org\/10.1109\/TETC.2018.2799078","DOI":"10.1109\/TETC.2018.2799078"},{"key":"e_1_3_3_1_3_2","volume-title":"Instruction Sets Should Be Free: The Case For RISC-V","author":"Asanovi\u0107 Krste","year":"2014","unstructured":"Krste Asanovi\u0107 and David\u00a0A. Patterson. 2014. Instruction Sets Should Be Free: The Case For RISC-V. Technical Report UCB\/EECS-2014-146. EECS Department, University of California, Berkeley. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2014\/EECS-2014-146.html"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"MohammadHossein AskariHemmat Theo Dupuis Yoan Fournier Nizar\u00a0El Zarif Matheus Cavalcante Matteo Perotti Frank Gurkaynak Luca Benini Francois Leduc-Primeau Yvon Savaria and Jean-Pierre David. 2023. Quark: An Integer RISC-V Vector Processor for Sub-Byte Quantized DNN Inference. arxiv:https:\/\/arXiv.org\/abs\/2302.05996\u00a0[cs.AR]","DOI":"10.1109\/ISCAS46773.2023.10181985"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.5555\/1247360.1247401"},{"key":"e_1_3_3_1_6_2","unstructured":"Alex Benn\u00e9e Peter Maydell Paolo Bonzini and Christoph M\u00fcllner. 2022. qemu. https:\/\/github.com\/qemu\/qemu\/blob\/v7.2.0\/docs\/devel\/tcg-plugins.rst."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Nathan Binkert Bradford Beckmann Gabriel Black Steven\u00a0K. Reinhardt Ali Saidi Arkaprava Basu Joel Hestness Derek\u00a0R. Hower Tushar Krishna Somayeh Sardashti Rathijit Sen Korey Sewell Muhammad Shoaib Nilay Vaish Mark\u00a0D. Hill and David\u00a0A. Wood. 2011. The Gem5 Simulator. SIGARCH Comput. Archit. News 39 2 (aug 2011) 1\u20137. https:\/\/doi.org\/10.1145\/2024716.2024718","DOI":"10.1145\/2024716.2024718"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Guy\u00a0E Blelloch. 1996. Programming parallel algorithms. Commun. ACM 39 3 (1996) 85\u201397.","DOI":"10.1145\/227234.227246"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/139669.139729"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Emanuele Borgonovo. 2008. Sensitivity Analysis of Model Output with Input Constraints: A Generalized Rationale for Local Methods. Risk Analysis 28 3 (2008) 667\u2013680. https:\/\/doi.org\/10.1111\/j.1539-6924.2008.01052.x arXiv:https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1111\/j.1539-6924.2008.01052.x","DOI":"10.1111\/j.1539-6924.2008.01052.x"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Emanuele Borgonovo and Elmar Plischke. 2016. Sensitivity analysis: A review of recent advances. European Journal of Operational Research 248 3 (2016) 869\u2013887. https:\/\/doi.org\/10.1016\/j.ejor.2015.06.032","DOI":"10.1016\/j.ejor.2015.06.032"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Richard\u00a0P. Brent. 1974. The Parallel Evaluation of General Arithmetic Expressions. J. ACM 21 2 (apr 1974) 201\u2013206. https:\/\/doi.org\/10.1145\/321812.321815","DOI":"10.1145\/321812.321815"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303988"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983061"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Gregory\u00a0J. Chaitin Marc\u00a0A. Auslander Ashok\u00a0K. Chandra John Cocke Martin\u00a0E. Hopkins and Peter\u00a0W. Markstein. 1981. Register allocation via coloring. Computer Languages 6 1 (1981) 47\u201357. https:\/\/doi.org\/10.1016\/0096-0551(81)90048-5","DOI":"10.1016\/0096-0551(81)90048-5"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.32"},{"key":"e_1_3_3_1_17_2","unstructured":"M. Clark. 2017. rv 8 : a high performance RISC-V to x 86 binary translator."},{"key":"e_1_3_3_1_18_2","unstructured":"Marcin Copik Marcin Chrapek Alexandru Calotoiu and Torsten Hoefler. 2022. Software Resource Disaggregation for HPC with Serverless Computing. https:\/\/htor.inf.ethz.ch\/publications\/img\/2022_copik_serverless_hpc_report.pdf"},{"key":"e_1_3_3_1_19_2","volume-title":"Introduction to Algorithms (2nd ed.)","author":"Cormen Thomas\u00a0H.","year":"2001","unstructured":"Thomas\u00a0H. Cormen, Clifford Stein, Ronald\u00a0L. Rivest, and Charles\u00a0E. Leiserson. 2001. Introduction to Algorithms (2nd ed.). McGraw-Hill Higher Education."},{"key":"e_1_3_3_1_20_2","unstructured":"Kent Czechowski Casey Battaglino Chris McClanahan Aparna Chandramowlishwaran and Richard\u00a0W Vuduc. 2011. Balance Principles for Algorithm-Architecture Co-Design. HotPar 11 (2011) 9\u20139."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Jens Domke Emil Vatai Balazs Gerofi Yuetsu Kodama Mohamed Wahib Artur Podobas Sparsh Mittal Miquel Peric\u00e0s Lingqi Zhang Peng Chen Aleksandr Drozd and Satoshi Matsuoka. 2022. At the Locus of Performance: A Case Study in Enhancing CPUs with Copious 3D-Stacked Cache. https:\/\/doi.org\/10.48550\/ARXIV.2204.02235","DOI":"10.1145\/3629520"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3457388.3458657"},{"key":"e_1_3_3_1_23_2","unstructured":"John D\u2019Ambrosia. 2022. IEEE P802.3df\u2122 Defines Architecture Holistically to Achieve 800 Gb\/s and 1.6 Tb\/s Ethernet. IEEE Standards Association (2022). https:\/\/standards.ieee.org\/beyond-standards\/ieee-p802-3df-defines-a-holistic-architectural-approach\/"},{"key":"e_1_3_3_1_24_2","unstructured":"Louis-No\u00ebl\u00a0Pouchet et al.2012. Polybench: The polyhedral benchmark suite. https:\/\/web.cse.ohio-state.edu\/\u00a0pouchet.2\/software\/polybench\/polybench.html"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2002.1003561"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.5555\/3026877.3026897"},{"key":"e_1_3_3_1_27_2","volume-title":"Hardware Support for Hiding Cache Latency","author":"Golden Michael","year":"1993","unstructured":"Michael Golden and Trevor\u00a0N. Mudge. 1993. Hardware Support for Hiding Cache Latency. https:\/\/citeseerx.ist.psu.edu\/document?repid=rep1&type=pdf&doi=9ccb22c1e276804247c1f581a78b9716d66c7a73"},{"key":"e_1_3_3_1_28_2","first-page":"182","volume-title":"Brent\u2019s Theorem","author":"Gustafson John\u00a0L.","year":"2011","unstructured":"John\u00a0L. Gustafson. 2011. Brent\u2019s Theorem. Springer US, Boston, MA, 182\u2013185. https:\/\/doi.org\/10.1007\/978-0-387-09766-480"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0Allen Heroux and Jack. Dongarra. 2013. Toward a new metric for ranking high performance computing systems. (6 2013). https:\/\/doi.org\/10.2172\/1089988","DOI":"10.2172\/1089988"},{"key":"e_1_3_3_1_30_2","unstructured":"Torsten Hoefler Duncan Roweth Keith Underwood Bob Alverson Mark Griswold Vahid Tabatabaee Mohan Kalkunte Surendra Anubolu Siyan Shen Abdul Kabbani Moray McLaren and Steve Scott. 2023. Datacenter Ethernet and RDMA: Issues at Hyperscale. arxiv:https:\/\/arXiv.org\/abs\/2302.03337\u00a0[cs.NI]"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00034"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/800076.802486"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"I Karlin. 2012. LULESH Programming Model and Performance Ports Overview. (12 2012). https:\/\/doi.org\/10.2172\/1059462","DOI":"10.2172\/1059462"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.2172\/1090032"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.51"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3409964.3461796"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Grzegorz Kwasniewski Marko Kabi\u0107 Maciej Besta Joost VandeVondele Raffaele Solc\u00e0 and Torsten Hoefler. 2019. Red-blue pebbling revisited: near optimal parallel matrix-matrix multiplication. arxiv:https:\/\/arXiv.org\/abs\/1908.09606\u00a0[cs.CC]","DOI":"10.1145\/3295500.3356181"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3488423.3519329"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.119"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-49051-7_10"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Kevin Lim Jichuan Chang Trevor Mudge Parthasarathy Ranganathan Steven\u00a0K. Reinhardt and Thomas\u00a0F. Wenisch. 2009. Disaggregated Memory for Expansion and Sharing in Blade Servers. SIGARCH Comput. Archit. News 37 3 (jun 2009) 267\u2013278. https:\/\/doi.org\/10.1145\/1555815.1555789","DOI":"10.1145\/1555815.1555789"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00165"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Chi-Keung Luk Robert Cohn Robert Muth Harish Patil Artur Klauser Geoff Lowney Steven Wallace Vijay\u00a0Janapa Reddi and Kim Hazelwood. 2005. Pin: building customized program analysis tools with dynamic instrumentation. Acm sigplan notices 40 6 (2005) 190\u2013200.","DOI":"10.1145\/1064978.1065034"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"George Michelogiannakis Benjamin Klenk Brandon Cook Min\u00a0Yee Teh Madeleine Glick Larry Dennison Keren Bergman and John Shalf. 2022. A Case For Intra-Rack Resource Disaggregation in HPC. ACM Trans. Archit. Code Optim. 19 2 Article 29 (mar 2022) 26\u00a0pages. https:\/\/doi.org\/10.1145\/3514245","DOI":"10.1145\/3514245"},{"key":"e_1_3_3_1_45_2","series-title":"Leibniz International Proceedings in Informatics (LIPIcs)","first-page":"554","volume-title":"31st International Symposium on Theoretical Aspects of Computer Science (STACS 2014)","volume":"25","author":"Mitchell John\u00a0C.","year":"2014","unstructured":"John\u00a0C. Mitchell and Joe Zimmerman. 2014. Data-Oblivious Data Structures. In 31st International Symposium on Theoretical Aspects of Computer Science (STACS 2014)(Leibniz International Proceedings in Informatics (LIPIcs), Vol.\u00a025), Ernst\u00a0W. Mayr and Natacha Portier (Eds.). Schloss Dagstuhl\u2013Leibniz-Zentrum fuer Informatik, Dagstuhl, Germany, 554\u2013565. https:\/\/doi.org\/10.4230\/LIPIcs.STACS.2014.554"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2007.4362179"},{"key":"e_1_3_3_1_47_2","unstructured":"Onur Mutlu. 2021. Out-of-Order Execution. https:\/\/safari.ethz.ch\/digitaltechnik\/spring2021\/lib\/exe\/fetch.php?media=onur-digitaldesign_comparch-2021-lecture16-out-of-order-execution-beforelecture.pdf"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Crist\u00f3bal\u00a0A. Navarro Nancy Hitschfeld-Kahler and Luis Mateu. 2014. A Survey on Parallel Computing and its Applications in Data-Parallel Problems Using GPU Architectures. Communications in Computational Physics 15 2 (2014) 285\u2013329. https:\/\/doi.org\/10.4208\/cicp.110113.010813a","DOI":"10.4208\/cicp.110113.010813a"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00210"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD49847.2020.00034"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"crossref","unstructured":"Francesca Pianosi Keith Beven Jim Freer Jim\u00a0W. Hall Jonathan Rougier David\u00a0B. Stephenson and Thorsten Wagener. 2016. Sensitivity analysis of environmental models: A systematic review with practical workflow. Environmental Modelling & Software 79 (2016) 214\u2013232. https:\/\/doi.org\/10.1016\/j.envsoft.2016.02.008","DOI":"10.1016\/j.envsoft.2016.02.008"},{"key":"e_1_3_3_1_52_2","unstructured":"Brian\u00a0Paul Railing. 2015. Collecting and representing parallel programs with high performance instrumentation. Ph.\u00a0D. Dissertation. Georgia Institute of Technology."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2002.995707"},{"key":"e_1_3_3_1_54_2","volume-title":"Addressing Uncertainty in Multisector Dynamics Research","author":"Reed Patrick\u00a0M.","year":"2022","unstructured":"Patrick\u00a0M. Reed, Antonia Hadjimichael, Keyvan Malek, Tina Karimi, Chris\u00a0R. Vernon, Vivek Srikrishnan, Rohini\u00a0S. Gupta, David\u00a0F. Gold, Ben Lee, Klaus Keller, Travis\u00a0B. Thurber, and Jennie\u00a0S. Rice. 2022. Addressing Uncertainty in Multisector Dynamics Research. Zenodo. https:\/\/doi.org\/10.5281\/zenodo.6110623"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643546"},{"key":"e_1_3_3_1_56_2","unstructured":"RISC-V Collaborative Project. Accessed 2023. RISC-V GNU Toolchain. https:\/\/github.com\/riscv-collab\/riscv-gnu-toolchain."},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Arun\u00a0F Rodrigues K\u00a0Scott Hemmert Brian\u00a0W Barrett Chad Kersey Ron Oldfield Marlo Weston Rolf Risen Jeanine Cook Paul Rosenfeld Elliot Cooper-Balis et\u00a0al. 2011. The structural simulation toolkit. ACM SIGMETRICS Performance Evaluation Review 38 4 (2011) 37\u201342.","DOI":"10.1145\/1964218.1964225"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"crossref","unstructured":"Andrea Saltelli. 1999. Sensitivity analysis: Could better methods be used? Journal of Geophysical Research: Atmospheres 104 D3 (1999) 3789\u20133793. https:\/\/doi.org\/10.1029\/1998JD100042 arXiv:https:\/\/agupubs.onlinelibrary.wiley.com\/doi\/pdf\/10.1029\/1998JD100042","DOI":"10.1029\/1998JD100042"},{"key":"e_1_3_3_1_59_2","unstructured":"Andrea Saltelli Ksenia Aleksankina William Becker Pamela Fennell Federico Ferretti Niels Holst Sushan Li and Qiongli Wu. 2017. Why So Many Published Sensitivity Analyses Are False. A Systematic Review of Sensitivity Analysis Practices. arxiv:https:\/\/arXiv.org\/abs\/1711.11359\u00a0[stat.AP]"},{"key":"e_1_3_3_1_60_2","volume-title":"Sensitivity Analysis in Practice: A Guide to Assessing Scientific Models","author":"Saltelli A.","year":"2004","unstructured":"A. Saltelli, S. Tarantola, F. Campolongo, and M. Ratto. 2004. Sensitivity Analysis in Practice: A Guide to Assessing Scientific Models. Wiley. https:\/\/books.google.ch\/books?id=NsAVmohPNpQC"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"crossref","unstructured":"Daniel Sanchez and Christos Kozyrakis. 2013. ZSim: Fast and accurate microarchitectural simulation of thousand-core systems. ACM SIGARCH Computer architecture news 41 3 (2013) 475\u2013486.","DOI":"10.1145\/2508148.2485963"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"crossref","unstructured":"Paul Scheffler Florian Zaruba Fabian Schuiki Torsten Hoefler and Luca Benini. 2020. Indirection Stream Semantic Register Architecture for Efficient Sparse-Dense Linear Algebra. arxiv:https:\/\/arXiv.org\/abs\/2011.08070\u00a0[cs.AR]","DOI":"10.23919\/DATE51398.2021.9474230"},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853196"},{"key":"e_1_3_3_1_64_2","unstructured":"Anup Sharma and Davidlohr Bueso. 2023. Linux kernel profiling with perf. https:\/\/perf.wiki.kernel.org\/index.php\/Tutorial. Accessed: March 23 2023."},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379258"},{"key":"e_1_3_3_1_66_2","unstructured":"Richard Stallman Roland Pesch and Stan Shebs. 2010. Debugging with gdb. https:\/\/www.eecs.umich.edu\/courses\/eecs373\/readings\/Debugger.pdf"},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-420158-3.00004-6"},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2002.1106008"},{"key":"e_1_3_3_1_69_2","volume-title":"The RISC-V Instruction Set Manual, Volume I: User-Level ISA, Version 2.0","author":"Waterman Andrew","year":"2014","unstructured":"Andrew Waterman, Yunsup Lee, David\u00a0A. Patterson, and Krste Asanovi\u0107. 2014. The RISC-V Instruction Set Manual, Volume I: User-Level ISA, Version 2.0. Technical Report UCB\/EECS-2014-54. EECS Department, University of California, Berkeley. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2014\/EECS-2014-54.html"},{"key":"e_1_3_3_1_70_2","doi-asserted-by":"crossref","unstructured":"Samuel Williams Andrew Waterman and David Patterson. 2009. Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52 4 (2009) 65\u201376.","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_3_1_71_2","unstructured":"Xiaoyang Zhang Junmin Xiao and Guangming Tan. 2020. I\/O Lower Bounds for Auto-tuning of Convolutions in CNNs. arxiv:https:\/\/arXiv.org\/abs\/2012.15667\u00a0[cs.LG]"},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS49844.2020.00022"}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3734530","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:04:40Z","timestamp":1755867880000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3734530"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":71,"alternative-id":["10.1145\/3721145.3734530","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3734530","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}