{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:46:47Z","timestamp":1772725607233,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731111","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"1819-1833","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Folded Banks: 3D-Stacked HBM Design for Fine-Grained Random-Access Bandwidth"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7011-7082","authenticated-orcid":false,"given":"Vignesh","family":"Adhinarayanan","sequence":"first","affiliation":[{"name":"AMD Research and Advanced Development, Advanced Micro Devices (AMD), Inc., Austin, Texas, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5444-6521","authenticated-orcid":false,"given":"Bradford M.","family":"Beckmann","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development, Advanced Micro Devices (AMD), Inc., Bellevue, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8288-393X","authenticated-orcid":false,"given":"Wantong","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of California, Riverside, Riverside, California, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4277-9713","authenticated-orcid":false,"given":"Mohammad","family":"Seyedzadeh","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7412-5922","authenticated-orcid":false,"given":"Sergey","family":"Blagodurov","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development, Advanced Micro Devices (AMD), Inc., Bellevue, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2242-8564","authenticated-orcid":false,"given":"Derrick","family":"Aguren","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development, Advanced Micro Devices (AMD), Inc., Austin, Texas, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0164-9412","authenticated-orcid":false,"given":"Hayden Hyungdong","family":"Lee","sequence":"additional","affiliation":[{"name":"Central Engineering, Advanced Micro Devices (AMD), Inc., Austin, Texas, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. CORAL-2 Benchmarks. https:\/\/asc.llnl.gov\/coral-2-benchmarks"},{"key":"e_1_3_3_2_3_2","unstructured":"[n. d.]. High Bandwidth Memory DRAM (HBM3). https:\/\/www.jedec.org\/system\/files\/docs\/JESD238.pdf"},{"key":"e_1_3_3_2_4_2","unstructured":"Sriram Aananthakrishnan Nesreen\u00a0K. Ahmed Vincent Cave Marcelo Cintra Yigit Demir Kristof\u00a0Du Bois Stijn Eyerman Joshua\u00a0B. Fryman Ivan Ganev Wim Heirman Hans-Christian Hoppe Jason Howard Ibrahim Hur MidhunChandra Kodiyath Samkit Jain Daniel\u00a0S. Klowden Marek\u00a0M. Landowski Laurent Montigny Ankit More Przemyslaw Ossowski Robert Pawlowski Nick Pepperling Fabrizio Petrini Mariusz Sikora Balasubramanian Seshasayee Shaden Smith Sebastian Szkoda Sanjaya Tayal Jesmin\u00a0Jahan Tithi Yves Vandriessche and Izajasz\u00a0P. Wrosz. 2020. PIUMA: Programmable Integrated Unified Memory Architecture. https:\/\/arxiv.org\/abs\/2010.06277"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750397"},{"key":"e_1_3_3_2_6_2","volume-title":"2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)","author":"Chatterjee Niladrish","unstructured":"Niladrish Chatterjee, Mike O\u2019Connor, Donghyuk Lee, Daniel\u00a0R. Johnson, Stephen\u00a0W. Keckler, Minsoo Rhu, and William\u00a0J. Dally. [n. d.]. Architecting an Energy-Efficient DRAM System for GPUs. In 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Pai-Yu Chen Xiaochen Peng and Shimeng Yu. 2018. NeuroSim: A circuit-level macro model for benchmarking neuro-inspired architectures in online learning. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems 37 12 (2018) 3067\u20133080.","DOI":"10.1109\/TCAD.2018.2789723"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Elliott Cooper-Balis and Bruce Jacob. 2010. Fine-Grained Activation for Power Reduction in DRAM. IEEE Micro 30 (2010).","DOI":"10.1109\/MM.2010.43"},{"key":"e_1_3_3_2_9_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2018. BERT: Pre-training of Deep Bi-directional Transformers for Language Understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_2_10_2","volume-title":"2016 6th Workshop on Irregular Applications: Architecture and Algorithms (IA3)","author":"Dysart Timothy","unstructured":"Timothy Dysart, Peter Kogge, Martin Deneroff, Eric Bovell, Preston Briggs, Jay Brockman, Kenneth Jacobsen, Yujen Juan, Shannon Kuntz, Richard Lethin, Janice McMahon, Chandra Pawar, Martin Perrigo, Sarah Rucker, John Ruttenberg, Max Ruttenberg, and Steve Stein. [n. d.]. Highly Scalable Near Memory Processing with Migrating Threads on the Emu System Architecture. In 2016 6th Workshop on Irregular Applications: Architecture and Algorithms (IA3)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-07312-0_3"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM13553.2020.9372039"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Oded Green James Fox Jeffrey Young Jun Shirako and David Bader. 2019. Performance Impact of Memory Channels on Sparse and Irregular Algorithms.","DOI":"10.1109\/IA349570.2019.00016"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783730"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Eric Hein Srinivas Eswar Abdurrahman Yasar Jiajia Li Jeffrey\u00a0S. Young Thomas\u00a0M. Conte Umit\u00a0V. Catalyurek Rich Vuduc Jason Riedy and Bora Ucar. 2019. Programming Strategies for Irregular Algorithms on the Emu Chick.","DOI":"10.1145\/3418077"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Michihiro Inoue TOSHIO Yamada HISAKAZU Kotani HIROYUKI Yamauchi ATSUSHI Fujiwara JUNK Matsushima HIRONORI Akamatsu MASANORI Fukumoto MASAFUMI Kubota ICHIRO Nakao et\u00a0al. 1988. A 16-Mbit DRAM with a Relaxed Sense-Amplifier-Pitch Open-Bit-Line Architecture. IEEE journal of solid-state circuits 23 5 (1988) 1104\u20131112.","DOI":"10.1109\/4.5931"},{"key":"e_1_3_3_2_17_2","volume-title":"High Bandwidth Memory DRAM (HBM3)","author":"Association JEDEC Solid State Technology","year":"2023","unstructured":"JEDEC Solid State Technology Association. 2023. High Bandwidth Memory DRAM (HBM3). JEDEC Standard JESD238A. JEDEC Solid State Technology Association. Revision of JESD238, January 2022."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/ECTC51906.2022.00057"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Joohee Kim Jun\u00a0So Pak Jonghyun Cho Eakhwan Song Jeonghyeon Cho Heegon Kim Taigon Song Junho Lee Hyungdong Lee Kunwoo Park et\u00a0al. 2011. High-frequency scalable electrical model and analysis of a through silicon via (TSV). IEEE Transactions on Components Packaging and Manufacturing Technology 1 2 (2011) 181\u2013195.","DOI":"10.1109\/TCPMT.2010.2101890"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Suk\u00a0Min Kim Byungkyu Song and Seong-Ook Jung. 2021. Imbalance-Tolerant Bit-Line Sense Amplifier for Dummy-less Open Bit-line Scheme in DRAM. IEEE Transactions on Circuits and Systems I: Regular Papers 68 6 (2021) 2546\u20132554.","DOI":"10.1109\/TCSI.2021.3063183"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2012.6237032"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Yoongu Kim Weikun Yang and Onur Mutlu. 2015. Ramulator: A Fast and Extensible DRAM Simulator. IEEE Computer architecture letters 15 1 (2015) 45\u201349.","DOI":"10.1109\/LCA.2015.2414456"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"John\u00a0H Lau. 2022. Recent Advances and Trends in Advanced Packaging. IEEE Transactions on Components Packaging and Manufacturing Technology 12 2 (2022) 228\u2013252.","DOI":"10.1109\/TCPMT.2022.3144461"},{"key":"e_1_3_3_2_24_2","unstructured":"Donghyuk Lee Samira Khan Lavanya Subramanian Saugata Ghose Rachata Ausavarungnirun Gennady Pekhimenko Vivek Seshadri and Onur Mutlu. 2016. Understanding and Exploiting Design-Induced Latency Variation in Modern DRAM Chips. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1610.09604 (2016)."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CICC.2015.7338357"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.35"},{"key":"e_1_3_3_2_27_2","volume-title":"2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)","author":"Liu Yuxi","unstructured":"Yuxi Liu, Xia Zhao, Magnus Jahre, Zhenlin Wang, Xiaolin Wang, Yingwei Luo, and Lieven Eeckhout. [n. d.]. Get Out of the Valley: Power-Efficient Address Mapping for GPUs. In 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_3_2_28_2","volume-title":"2008 International Symposium on Computer Architecture","author":"Loh Gabriel\u00a0H.","unstructured":"Gabriel\u00a0H. Loh. [n. d.]. 3D-Stacked Memory Architectures for Multi-core Processors. In 2008 International Symposium on Computer Architecture."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2015.7363670"},{"key":"e_1_3_3_2_30_2","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson\u00a0G Azzolini et\u00a0al. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1906.00091 (2019)."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731694"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.5555\/AAI29605208"},{"key":"e_1_3_3_2_33_2","unstructured":"Ataberk Olgun F.\u00a0Nisa Bostanci Geraldo\u00a0F. Oliveira Yahya\u00a0Can Tugrul Rahul Bera A.\u00a0Giray Yaglikci Hasan Hassan Oguz Ergin and Onur Mutlu. 2022. Sectored DRAM: An Energy-Efficient High-Throughput and Practical Fine-Grained DRAM Architecture."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124545"},{"key":"e_1_3_3_2_35_2","unstructured":"Subhankar Pal Swagath Venkataramani Viji Srinivasan and Kailash Gopalakrishnan. 2022. OnSRAM: Efficient Inter-Node On-Chip Scratchpad Management in Deep Learning Accelerators. ACM Trans. Embed. Comput. Syst. Article 86 (oct 2022)."},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Myeong-Jae Park Jinhyung Lee Kyungjun Cho Jihwan Park Junil Moon Sung-Hak Lee Tae-Kyun Kim Sanghoon Oh Seokwoo Choi Yongsuk Choi et\u00a0al. 2022. A 192-Gb 12-high 896-GB\/s HBM3 DRAM with a TSV auto-calibration scheme and machine-learning-based layout optimization. IEEE Journal of Solid-State Circuits 58 1 (2022) 256\u2013269.","DOI":"10.1109\/JSSC.2022.3193354"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830820"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Tsugio Takahashi Tomonori Sekiguchi Riichiro Takemura Seiji Narui Hiroki Fujisawa Shinichi Miyatake Makoto Morino Koji Arai Satoru Yamada Shoji Shukuri et\u00a0al. 2001. A Multigigabit DRAM Technology with 6F2 Open-Bitline Cell Distributed Overdriven Sensing and Stacked-Flash Fuse. IEEE Journal of Solid-State Circuits 36 11 (2001) 1721\u20131727.","DOI":"10.1109\/4.962294"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815983"},{"key":"e_1_3_3_2_40_2","volume-title":"2019 49th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN)","author":"Wang Haonan","unstructured":"Haonan Wang and Adwait Jog. [n. d.]. Exploiting Latency and Error Tolerance of GPGPU Applications for an Energy-Efficient DRAM. In 2019 49th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN)."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337867"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Yaohua Wang Lois Orosa Xiangjun Peng Yang Guo Saugata Ghose Minesh Patel Jeremie\u00a0S. Kim Juan\u00a0G\u00f3mez Luna Mohammad Sadrosadati Nika\u00a0Mansouri Ghiasi and Onur Mutlu. 2020. FIGARO: Improving System Performance via Fine-Grained In-DRAM Data Relocation and Caching.","DOI":"10.1109\/MICRO50266.2020.00036"},{"key":"e_1_3_3_2_43_2","volume-title":"2014 ACM\/IEEE 41st International Symposium on Computer Architecture (ISCA)","author":"Zhang Tao","unstructured":"Tao Zhang, Ke Chen, Cong Xu, Guangyu Sun, Tao Wang, and Yuan Xie. [n. d.]. Half-DRAM: A High-Bandwidth and Low-Power DRAM Architecture from the Rethinking of Fine-Grained Activation. In 2014 ACM\/IEEE 41st International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"Ting Zheng and Muhannad\u00a0S Bakir. 2022. Benchmarking frequency-dependent parasitics of fine-pitch off-chip I\/Os for 2.5 D and 3D heterogeneous integration. IEEE Transactions on Components Packaging and Manufacturing Technology 12 12 (2022) 2002\u20132012.","DOI":"10.1109\/TCPMT.2022.3223966"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731111","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:09:40Z","timestamp":1750504180000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731111"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":43,"alternative-id":["10.1145\/3695053.3731111","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731111","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}