{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:20:04Z","timestamp":1750220404677,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":101,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Science Foundation","award":["CNS-1940048"],"award-info":[{"award-number":["CNS-1940048"]}]},{"DOI":"10.13039\/100007602","name":"University of California, Riverside","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007602","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Broadcom Inc."},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2007124"],"award-info":[{"award-number":["CNS-2007124"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,18]]},"DOI":"10.1145\/3466752.3480122","type":"proceedings-article","created":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T19:12:05Z","timestamp":1634497925000},"page":"28-45","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["NDS: N-Dimensional Storage"],"prefix":"10.1145","author":[{"given":"Yu-Chia","family":"Liu","sequence":"first","affiliation":[{"name":"University of California, Riverside"}]},{"given":"Hung-Wei","family":"Tseng","sequence":"additional","affiliation":[{"name":"University of California, Riverside, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1376616.1376712"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00023"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/291069.291026"},{"key":"e_1_3_2_1_4_1","unstructured":"Advanced Micro Devices Inc.2014. FirePro DirectGMA Technical Overview. http:\/\/developer.amd.com\/tools-and-sdks\/graphics-development\/firepro-sdk\/firepro-directgma-sdk\/.  Advanced Micro Devices Inc.2014. FirePro DirectGMA Technical Overview. http:\/\/developer.amd.com\/tools-and-sdks\/graphics-development\/firepro-sdk\/firepro-directgma-sdk\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Francesc Alted Martin Durant Stephan Hoyer John Kirkham Alistair Miles Mamy Ratsimbazafy Matthew Rocklin Vincent Schut Anthony Scopatz and Prakhar Goel. [n. d.]. Zarr. https:\/\/github.com\/zarr-developers\/zarr-python  Francesc Alted Martin Durant Stephan Hoyer John Kirkham Alistair Miles Mamy Ratsimbazafy Matthew Rocklin Vincent Schut Anthony Scopatz and Prakhar Goel. [n. d.]. Zarr. https:\/\/github.com\/zarr-developers\/zarr-python"},{"key":"e_1_3_2_1_6_1","unstructured":"Amber Huffman. 2012. NVM Express Revision 1.1. http:\/\/nvmexpress.org\/wp-content\/uploads\/2013\/05\/NVM_Express_1_1.pdf.  Amber Huffman. 2012. NVM Express Revision 1.1. http:\/\/nvmexpress.org\/wp-content\/uploads\/2013\/05\/NVM_Express_1_1.pdf."},{"key":"e_1_3_2_1_7_1","unstructured":"Apache Software Foundation. [n. d.]. Apache Avro(TM) 1.10.2 Documentation. https:\/\/avro.apache.org\/docs\/current\/  Apache Software Foundation. [n. d.]. Apache Avro(TM) 1.10.2 Documentation. https:\/\/avro.apache.org\/docs\/current\/"},{"key":"e_1_3_2_1_8_1","unstructured":"Apache Software Foundation. [n. d.]. Apache Parquet. https:\/\/parquet.apache.org\/  Apache Software Foundation. [n. d.]. Apache Parquet. https:\/\/parquet.apache.org\/"},{"key":"e_1_3_2_1_9_1","unstructured":"Apache Software Foundation. [n. d.]. The smallest fastest columnar storage for Hadoop workloads.https:\/\/orc.apache.org\/  Apache Software Foundation. [n. d.]. The smallest fastest columnar storage for Hadoop workloads.https:\/\/orc.apache.org\/"},{"volume-title":"Parallel tensor compression for large-scale scientific data. In 2016 IEEE international parallel and distributed processing symposium (IPDPS)","author":"Austin Woody","key":"e_1_3_2_1_10_1","unstructured":"Woody Austin , Grey Ballard , and Tamara\u00a0 G Kolda . 2016. Parallel tensor compression for large-scale scientific data. In 2016 IEEE international parallel and distributed processing symposium (IPDPS) . IEEE , 912\u2013922. Woody Austin, Grey Ballard, and Tamara\u00a0G Kolda. 2016. Parallel tensor compression for large-scale scientific data. In 2016 IEEE international parallel and distributed processing symposium (IPDPS). IEEE, 912\u2013922."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1137\/060676489"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1090\/conm\/588"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2017.8091026"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840311"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 15th Usenix Conference on File and Storage Technologies (Santa clara, CA, USA) (FAST\u201917)","author":"Bj\u00f8rling Matias","year":"2017","unstructured":"Matias Bj\u00f8rling , Javier Gonz\u00e1lez , and Philippe Bonnet . 2017 . LightNVM: The Linux Open-Channel SSD Subsystem . In Proceedings of the 15th Usenix Conference on File and Storage Technologies (Santa clara, CA, USA) (FAST\u201917) . USENIX Association, USA, 359\u2013373. Matias Bj\u00f8rling, Javier Gonz\u00e1lez, and Philippe Bonnet. 2017. LightNVM: The Linux Open-Channel SSD Subsystem. In Proceedings of the 15th Usenix Conference on File and Storage Technologies (Santa clara, CA, USA) (FAST\u201917). USENIX Association, USA, 359\u2013373."},{"key":"e_1_3_2_1_16_1","unstructured":"Broadcom Inc.2019. White Paper: NVMe over Fabrics Performance Stingray(TM)-Based Storage Appliance. https:\/\/docs.broadcom.com\/doc\/broadcom-stingray-100G-NVMe-oF-performance.  Broadcom Inc.2019. White Paper: NVMe over Fabrics Performance Stingray(TM)-Based Storage Appliance. https:\/\/docs.broadcom.com\/doc\/broadcom-stingray-100G-NVMe-oF-performance."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536313"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1583991.1584053"},{"key":"e_1_3_2_1_19_1","volume-title":"Persona: A High-Performance Bioinformatics Framework. In 2017 USENIX Annual Technical Conference (USENIX ATC 17)","author":"Byma Stuart","year":"2017","unstructured":"Stuart Byma , Sam Whitlock , Laura Flueratoru , Ethan Tseng , Christos Kozyrakis , Edouard Bugnion , and James Larus . 2017 . Persona: A High-Performance Bioinformatics Framework. In 2017 USENIX Annual Technical Conference (USENIX ATC 17) . USENIX Association, Santa Clara, CA, 153\u2013165. https:\/\/www.usenix.org\/conference\/atc17\/technical-sessions\/presentation\/byma Stuart Byma, Sam Whitlock, Laura Flueratoru, Ethan Tseng, Christos Kozyrakis, Edouard Bugnion, and James Larus. 2017. Persona: A High-Performance Bioinformatics Framework. In 2017 USENIX Annual Technical Conference (USENIX ATC 17). USENIX Association, Santa Clara, CA, 153\u2013165. https:\/\/www.usenix.org\/conference\/atc17\/technical-sessions\/presentation\/byma"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2015.7322458"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1999.744334"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.33"},{"key":"e_1_3_2_1_23_1","unstructured":"C Cecka. 2018. Pro Tip: cuBLAS Strided Batched Matrix Multiply. https:\/\/developer.nvidia.com\/blog\/cublas-strided-batched-matrix-multiply\/  C Cecka. 2018. Pro Tip: cuBLAS Strided Batched Matrix Multiply. https:\/\/developer.nvidia.com\/blog\/cublas-strided-batched-matrix-multiply\/"},{"key":"e_1_3_2_1_24_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen , Thierry Moreau , Ziheng Jiang , Lianmin Zheng , Eddie Yan , Haichen Shen , Meghan Cowan , Leyuan Wang , Yuwei Hu , Luis Ceze , 2018 . TVM: An automated end-to-end optimizing compiler for deep learning . In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18) . 578\u2013594. Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, 2018. TVM: An automated end-to-end optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276493"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465295"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2010.5654017"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00079"},{"key":"e_1_3_2_1_29_1","unstructured":"Serban Giuroiu. 2012. CUDA K-Means Data Clustering. https:\/\/github.com\/serban\/kmeans  Serban Giuroiu. 2012. CUDA K-Means Data Clustering. https:\/\/github.com\/serban\/kmeans"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358291"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669118"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001154"},{"volume-title":"2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 1050\u20131063","author":"Hajinazar N.","key":"e_1_3_2_1_33_1","unstructured":"N. Hajinazar , P. Patel , M. Patel , K. Kanellopoulos , S. Ghose , R. Ausavarungnirun , G.\u00a0 F. Oliveira , J. Appavoo , V. Seshadri , and O. Mutlu . 2020. The Virtual Block Interface: A Flexible Alternative to the Conventional Virtual Memory Framework . In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 1050\u20131063 . N. Hajinazar, P. Patel, M. Patel, K. Kanellopoulos, S. Ghose, R. Ausavarungnirun, G.\u00a0F. Oliveira, J. Appavoo, V. Seshadri, and O. Mutlu. 2020. The Virtual Block Interface: A Flexible Alternative to the Conventional Virtual Memory Framework. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 1050\u20131063."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358275"},{"key":"e_1_3_2_1_35_1","volume-title":"Dynamic Multi-Resolution Data Storage. In 52th Annual IEEE\/ACM International Symposium on Microarchitecture(MICRO","author":"Hu Yu-Ching","year":"2019","unstructured":"Yu-Ching Hu , Murtuza\u00a0Taher Lokhandwala , Te I, and Hung-Wei Tseng . 2019 . Dynamic Multi-Resolution Data Storage. In 52th Annual IEEE\/ACM International Symposium on Microarchitecture(MICRO 2019). Yu-Ching Hu, Murtuza\u00a0Taher Lokhandwala, Te I, and Hung-Wei Tseng. 2019. Dynamic Multi-Resolution Data Storage. In 52th Annual IEEE\/ACM International Symposium on Microarchitecture(MICRO 2019)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2006.876103"},{"key":"e_1_3_2_1_37_1","volume-title":"Workshop on Profile and Feedback-Directed Compilation, Vol.\u00a0139","author":"Im Eun-Jin","year":"1998","unstructured":"Eun-Jin Im and Katherine Yelick . 1998 . Model-based memory hierarchy optimizations for sparse matrices . In Workshop on Profile and Feedback-Directed Compilation, Vol.\u00a0139 . Eun-Jin Im and Katherine Yelick. 1998. Model-based memory hierarchy optimizations for sparse matrices. In Workshop on Profile and Feedback-Directed Compilation, Vol.\u00a0139."},{"key":"e_1_3_2_1_38_1","unstructured":"Intel. 2015. Storage Performance Development Kit. https:\/\/spdk.io\/doc\/  Intel. 2015. Storage Performance Development Kit. https:\/\/spdk.io\/doc\/"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750412"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Yangwook Kang Yang-Suk Kee Ethan\u00a0L. Miller and Chanik Park. 2013. Enabling cost-effective data processing with smart SSD. In Mass Storage Systems and Technologies (MSST).  Yangwook Kang Yang-Suk Kee Ethan\u00a0L. Miller and Chanik Park. 2013. Enabling cost-effective data processing with smart SSD. In Mass Storage Systems and Technologies (MSST).","DOI":"10.1109\/MSST.2013.6558444"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2016.19"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.5555\/3314872.3314894"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124553"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.70"},{"key":"e_1_3_2_1_48_1","volume-title":"10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)","author":"Kyrola Aapo","year":"2012","unstructured":"Aapo Kyrola , Guy Blelloch , and Carlos Guestrin . 2012 . GraphChi: Large-scale graph computation on just a PC . In 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12) . 31\u201346. Aapo Kyrola, Guy Blelloch, and Carlos Guestrin. 2012. GraphChi: Large-scale graph computation on just a PC. In 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12). 31\u201346."},{"key":"e_1_3_2_1_49_1","volume-title":"12th USENIX Workshop on Hot Topics in Storage and File Systems, HotStorage 2020","author":"Lee Sangwon","year":"2020","unstructured":"Sangwon Lee , Gyuyoung Park , and Myoungsoo Jung . 2020 . TensorPRAM: Designing a Scalable Heterogeneous Deep Learning Accelerator with Byte-addressable PRAMs . In 12th USENIX Workshop on Hot Topics in Storage and File Systems, HotStorage 2020 , July 13-14, 2020, Anirudh Badam and Vijay Chidambaram (Eds.). USENIX Association. https:\/\/www.usenix.org\/conference\/hotstorage20\/presentation\/lee Sangwon Lee, Gyuyoung Park, and Myoungsoo Jung. 2020. TensorPRAM: Designing a Scalable Heterogeneous Deep Learning Accelerator with Byte-addressable PRAMs. In 12th USENIX Workshop on Hot Topics in Storage and File Systems, HotStorage 2020, July 13-14, 2020, Anirudh Badam and Vijay Chidambaram (Eds.). USENIX Association. https:\/\/www.usenix.org\/conference\/hotstorage20\/presentation\/lee"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.1982.1653971"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. 633\u2013644","author":"Li C.","year":"2016","unstructured":"C. Li , Y. Yang , M. Feng , S. Chakradhar , and H. Zhou . 2016. Optimizing Memory Efficiency for Deep Convolutional Neural Networks on GPUs. In SC \u201916 : Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. 633\u2013644 . https:\/\/doi.org\/10.1109\/SC. 2016 .53 10.1109\/SC.2016.53 C. Li, Y. Yang, M. Feng, S. Chakradhar, and H. Zhou. 2016. Optimizing Memory Efficiency for Deep Convolutional Neural Networks on GPUs. In SC \u201916: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. 633\u2013644. https:\/\/doi.org\/10.1109\/SC.2016.53"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807671"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2018.2868368"},{"key":"e_1_3_2_1_54_1","unstructured":"Wei-Kang Liao. 2003. Parallel K-Means Data Clustering. http:\/\/users.eecs.northwestern.edu\/~wkliao\/Kmeans\/index.html  Wei-Kang Liao. 2003. Parallel K-Means Data Clustering. http:\/\/users.eecs.northwestern.edu\/~wkliao\/Kmeans\/index.html"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2016.7753307"},{"key":"e_1_3_2_1_56_1","volume-title":"Robert\u00a0A Van\u00a0de Geijn, and FLAME\u00a0Working Note","author":"Low Tze\u00a0Meng","year":"2004","unstructured":"Tze\u00a0Meng Low , Robert\u00a0A Van\u00a0de Geijn, and FLAME\u00a0Working Note . 2004 . An API for manipulating matrices stored by blocks. Computer Science Department, University of Texas at Austin. Tze\u00a0Meng Low, Robert\u00a0A Van\u00a0de Geijn, and FLAME\u00a0Working Note. 2004. An API for manipulating matrices stored by blocks. Computer Science Department, University of Texas at Austin."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2892208.2892210"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322275"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1137\/16M108968X"},{"key":"e_1_3_2_1_60_1","unstructured":"Microchip Technology Inc.2020. Flashtec NVMe Controllers. https:\/\/www.microsemi.com\/product-directory\/storage\/3687-flashtec-nvme-controllers.  Microchip Technology Inc.2020. Flashtec NVMe Controllers. https:\/\/www.microsemi.com\/product-directory\/storage\/3687-flashtec-nvme-controllers."},{"key":"e_1_3_2_1_61_1","unstructured":"Micron. 2015. MT29F32G08CBADAWP Datasheet. https:\/\/www.micron.com\/parts\/nand-flash\/mass-storage\/mt29f32g08cbadawp?pc=%7B80EFFAAD-26CB-4D06-84BC-0E3274B960A9%7D  Micron. 2015. MT29F32G08CBADAWP Datasheet. https:\/\/www.micron.com\/parts\/nand-flash\/mass-storage\/mt29f32g08cbadawp?pc=%7B80EFFAAD-26CB-4D06-84BC-0E3274B960A9%7D"},{"key":"e_1_3_2_1_62_1","unstructured":"National Institute of Standards and Technology. 2021. Cryptographic Standards and Guidelines. https:\/\/csrc.nist.gov\/projects\/cryptographic-standards-and-guidelines\/archived-crypto-projects\/aes-development.  National Institute of Standards and Technology. 2021. Cryptographic Standards and Guidelines. https:\/\/csrc.nist.gov\/projects\/cryptographic-standards-and-guidelines\/archived-crypto-projects\/aes-development."},{"key":"e_1_3_2_1_63_1","unstructured":"NVIDIA. 2019. cuBLAS. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html.  NVIDIA. 2019. cuBLAS. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html."},{"key":"e_1_3_2_1_64_1","unstructured":"NVIDIA Corporation. 2014. CUDA C Programming Guide v6.0. http:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_C_Programming_Guide.pdf.  NVIDIA Corporation. 2014. CUDA C Programming Guide v6.0. http:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_C_Programming_Guide.pdf."},{"key":"e_1_3_2_1_65_1","unstructured":"NVIDIA Corporation. 2014. Developing a Linux Kernel Module Using RDMA for GPUDirect. http:\/\/docs.nvidia.com\/cuda\/pdf\/GPUDirect_RDMA.pdf.  NVIDIA Corporation. 2014. Developing a Linux Kernel Module Using RDMA for GPUDirect. http:\/\/docs.nvidia.com\/cuda\/pdf\/GPUDirect_RDMA.pdf."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080254"},{"key":"e_1_3_2_1_67_1","volume-title":"Image convolution with CUDA","author":"Podlozhnyuk Victor","year":"2097","unstructured":"Victor Podlozhnyuk . 2007. Image convolution with CUDA . NVIDIA Corporation white paper, June 2097 , 3 (2007). Victor Podlozhnyuk. 2007. Image convolution with CUDA. NVIDIA Corporation white paper, June 2097, 3 (2007)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/SST.2016.7765670"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_70_1","volume-title":"JSOI: A JSON-Based Interchange Format for Efficient Model Management. In 2019 ACM\/IEEE 22nd International Conference on Model Driven Engineering Languages and Systems Companion (MODELS-C). 259\u2013266","author":"Rodriguez Horacio\u00a0Hoyos","year":"2019","unstructured":"Horacio\u00a0Hoyos Rodriguez and Beatriz\u00a0Sanchez Pi\u00f1a . 2019 . JSOI: A JSON-Based Interchange Format for Efficient Model Management. In 2019 ACM\/IEEE 22nd International Conference on Model Driven Engineering Languages and Systems Companion (MODELS-C). 259\u2013266 . https:\/\/doi.org\/10.1109\/MODELS-C.2019.00041 10.1109\/MODELS-C.2019.00041 Horacio\u00a0Hoyos Rodriguez and Beatriz\u00a0Sanchez Pi\u00f1a. 2019. JSOI: A JSON-Based Interchange Format for Efficient Model Management. In 2019 ACM\/IEEE 22nd International Conference on Model Driven Engineering Languages and Systems Companion (MODELS-C). 259\u2013266. https:\/\/doi.org\/10.1109\/MODELS-C.2019.00041"},{"volume-title":"Proceedings of the IEEE International Symposium on Workload Characterization(IISWC \u201909)","author":"Che Boyer","key":"e_1_3_2_1_71_1","unstructured":"M.\u00a0 Boyer S.\u00a0 Che , J. Meng , D. Tarjan , J.\u00a0 W. Sheaffer , S.-H. Lee , and K. Skadron . 2009. Rodinia: A Benchmark Suite for Heterogeneous Computing . In Proceedings of the IEEE International Symposium on Workload Characterization(IISWC \u201909) . 44\u201354. M.\u00a0Boyer S.\u00a0Che, J. Meng, D. Tarjan, J.\u00a0W. Sheaffer, S.-H. Lee, and K. Skadron. 2009. Rodinia: A Benchmark Suite for Heterogeneous Computing. In Proceedings of the IEEE International Symposium on Workload Characterization(IISWC \u201909). 44\u201354."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"crossref","unstructured":"Yousef Saad. 2003. Iterative methods for sparse linear systems. SIAM.  Yousef Saad. 2003. Iterative methods for sparse linear systems. SIAM.","DOI":"10.1137\/1.9780898718003"},{"key":"e_1_3_2_1_73_1","unstructured":"Stephan Saalfeld. [n. d.]. N5. https:\/\/github.com\/saalfeldlab\/n5  Stephan Saalfeld. [n. d.]. N5. https:\/\/github.com\/saalfeldlab\/n5"},{"key":"e_1_3_2_1_74_1","volume-title":"Willow: A User-Programmable SSD. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)","author":"Seshadri Sudharsan","year":"2014","unstructured":"Sudharsan Seshadri , Mark Gahagan , Sundaram Bhaskaran , Trevor Bunker , Arup De , Yanqin Jin , Yang Liu , and Steven Swanson . 2014 . Willow: A User-Programmable SSD. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14) . USENIX Association, Broomfield, CO, 67\u201380. https:\/\/www.usenix.org\/conference\/osdi14\/technical-sessions\/presentation\/seshadri Sudharsan Seshadri, Mark Gahagan, Sundaram Bhaskaran, Trevor Bunker, Arup De, Yanqin Jin, Yang Liu, and Steven Swanson. 2014. Willow: A User-Programmable SSD. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14). USENIX Association, Broomfield, CO, 67\u201380. https:\/\/www.usenix.org\/conference\/osdi14\/technical-sessions\/presentation\/seshadri"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830820"},{"key":"e_1_3_2_1_76_1","volume-title":"PTL: PCM Translation Layer. In 2012 IEEE Computer Society Annual Symposium on VLSI. 380\u2013385","author":"Shao Z.","year":"2012","unstructured":"Z. Shao , N. Chang , and N. Dutt . 2012 . PTL: PCM Translation Layer. In 2012 IEEE Computer Society Annual Symposium on VLSI. 380\u2013385 . https:\/\/doi.org\/10.1109\/ISVLSI. 2012 .75 10.1109\/ISVLSI.2012.75 Z. Shao, N. Chang, and N. Dutt. 2012. PTL: PCM Translation Layer. In 2012 IEEE Computer Society Annual Symposium on VLSI. 380\u2013385. https:\/\/doi.org\/10.1109\/ISVLSI.2012.75"},{"key":"e_1_3_2_1_77_1","volume-title":"Tensor Contractions with Extended BLAS Kernels on CPU and GPU. In 2016 IEEE 23rd International Conference on High Performance Computing (HiPC). 193\u2013202","author":"Shi Y.","year":"2016","unstructured":"Y. Shi , U.\u00a0 N. Niranjan , A. Anandkumar , and C. Cecka . 2016 . Tensor Contractions with Extended BLAS Kernels on CPU and GPU. In 2016 IEEE 23rd International Conference on High Performance Computing (HiPC). 193\u2013202 . https:\/\/doi.org\/10.1109\/HiPC. 2016 .031 10.1109\/HiPC.2016.031 Y. Shi, U.\u00a0N. Niranjan, A. Anandkumar, and C. Cecka. 2016. Tensor Contractions with Extended BLAS Kernels on CPU and GPU. In 2016 IEEE 23rd International Conference on High Performance Computing (HiPC). 193\u2013202. https:\/\/doi.org\/10.1109\/HiPC.2016.031"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.06.002"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3157733","article-title":"Design of a high-performance GEMM-like tensor\u2013tensor multiplication","volume":"44","author":"Springer Paul","year":"2018","unstructured":"Paul Springer and Paolo Bientinesi . 2018 . Design of a high-performance GEMM-like tensor\u2013tensor multiplication . ACM Transactions on Mathematical Software (TOMS) 44 , 3 (2018), 1 \u2013 29 . Paul Springer and Paolo Bientinesi. 2018. Design of a high-performance GEMM-like tensor\u2013tensor multiplication. ACM Transactions on Mathematical Software (TOMS) 44, 3 (2018), 1\u201329.","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00068"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00062"},{"key":"e_1_3_2_1_82_1","unstructured":"Qian Sun. 2018. Parallel Implementation of Bellman Ford Algorithm. https:\/\/github.com\/sunnlo\/BellmanFord  Qian Sun. 2018. Parallel Implementation of Bellman Ford Algorithm. https:\/\/github.com\/sunnlo\/BellmanFord"},{"key":"e_1_3_2_1_83_1","unstructured":"The Linux Foundation. [n. d.]. Open Neural Network Exchange \u2013 The open standard for machine learning interoperability. https:\/\/onnx.ai\/  The Linux Foundation. [n. d.]. Open Neural Network Exchange \u2013 The open standard for machine learning interoperability. https:\/\/onnx.ai\/"},{"key":"e_1_3_2_1_84_1","volume-title":"Albis: High-Performance File Format for Big Data Systems. In USENIX Annual Technical Conference.","author":"Trivedi Animesh","year":"2018","unstructured":"Animesh Trivedi , Patrick Stuedi , Jonas Pfefferle , Adrian Sch\u00fcpbach , and Bernard Metzler . 2018 . Albis: High-Performance File Format for Big Data Systems. In USENIX Annual Technical Conference. Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Sch\u00fcpbach, and Bernard Metzler. 2018. Albis: High-Performance File Format for Big Data Systems. In USENIX Annual Technical Conference."},{"key":"e_1_3_2_1_85_1","volume-title":"Morpheus: Creating Application Objects Efficiently for Heterogeneous Computing. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA). 53\u201365","author":"Tseng Hung-Wei","year":"2016","unstructured":"Hung-Wei Tseng , Qianchen Zhao , Yuxiao Zhou , Mark Gahagan , and Steven Swanson . 2016 . Morpheus: Creating Application Objects Efficiently for Heterogeneous Computing. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA). 53\u201365 . https:\/\/doi.org\/10.1109\/ISCA.2016.15 10.1109\/ISCA.2016.15 Hung-Wei Tseng, Qianchen Zhao, Yuxiao Zhou, Mark Gahagan, and Steven Swanson. 2016. Morpheus: Creating Application Objects Efficiently for Heterogeneous Computing. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA). 53\u201365. https:\/\/doi.org\/10.1109\/ISCA.2016.15"},{"key":"e_1_3_2_1_86_1","volume-title":"Enrique\u00a0S Quintana-Orti, and Gregorio Quintana-Orti.","author":"Van\u00a0Zee G","year":"2009","unstructured":"Field\u00a0 G Van\u00a0Zee , Ernie Chan , Robert\u00a0A Van\u00a0de Geijn , Enrique\u00a0S Quintana-Orti, and Gregorio Quintana-Orti. 2009 . The libflame library for dense matrix computations. Computing in science & engineering 11, 6 (2009), 56\u201363. Field\u00a0G Van\u00a0Zee, Ernie Chan, Robert\u00a0A Van\u00a0de Geijn, Enrique\u00a0S Quintana-Orti, and Gregorio Quintana-Orti. 2009. The libflame library for dense matrix computations. Computing in science & engineering 11, 6 (2009), 56\u201363."},{"key":"e_1_3_2_1_87_1","volume-title":"Protocol Buffers: Google\u2019s Data Interchange Format. Technical Report","author":"Varda Kenton","year":"2008","unstructured":"Kenton Varda . 2008 . Protocol Buffers: Google\u2019s Data Interchange Format. Technical Report . http:\/\/google-opensource.blogspot.com\/2008\/07\/protocol-buffers-googles-data.html Kenton Varda. 2008. Protocol Buffers: Google\u2019s Data Interchange Format. Technical Report. http:\/\/google-opensource.blogspot.com\/2008\/07\/protocol-buffers-googles-data.html"},{"key":"e_1_3_2_1_88_1","unstructured":"Michel\u00a0Barlaud Vincent\u00a0Garcia \u00c9ric\u00a0Debreuve. 2018. kNN-CUDA. https:\/\/github.com\/vincentfpgarcia\/kNN-CUDA  Michel\u00a0Barlaud Vincent\u00a0Garcia \u00c9ric\u00a0Debreuve. 2018. kNN-CUDA. https:\/\/github.com\/vincentfpgarcia\/kNN-CUDA"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732967.2732972"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.5555\/3357062.3357066"},{"key":"e_1_3_2_1_91_1","unstructured":"Carl Yang Aydin Bulu\u00e7 and John\u00a0D. Owens. 2019. GraphBLAST: A High-Performance Linear Algebra-based Graph Framework on the GPU. CoRR abs\/1908.01407(2019). arxiv:1908.01407http:\/\/arxiv.org\/abs\/1908.01407  Carl Yang Aydin Bulu\u00e7 and John\u00a0D. Owens. 2019. GraphBLAST: A High-Performance Linear Algebra-based Graph Framework on the GPU. CoRR abs\/1908.01407(2019). arxiv:1908.01407http:\/\/arxiv.org\/abs\/1908.01407"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.14778\/3067421.3067424"},{"key":"e_1_3_2_1_93_1","unstructured":"Yu-Chia Liu and Hung-Wei Tseng. 2021. N-Dimensional Storage. https:\/\/anonymous.4open.science\/r\/NDS-CA87\/.  Yu-Chia Liu and Hung-Wei Tseng. 2021. N-Dimensional Storage. https:\/\/anonymous.4open.science\/r\/NDS-CA87\/."},{"key":"e_1_3_2_1_94_1","unstructured":"Zach Zimmerman. 2016. MSplitGEMM: Large matrix multiplication in CUDA. https:\/\/github.com\/zpzim\/MSplitGEMM.  Zach Zimmerman. 2016. MSplitGEMM: Large matrix multiplication in CUDA. https:\/\/github.com\/zpzim\/MSplitGEMM."},{"key":"e_1_3_2_1_95_1","volume-title":"NVMMU: A Non-Volatile Memory Management Unit for Heterogeneous GPU-SSD Architectures. In The 24th International Conference on Parallel Architectures and Compilation Techniques(PACT","author":"Zhang Jie","year":"2015","unstructured":"Jie Zhang , David Donofrio , John Shalf , Mahmut Kandemir , and Myoungsoo Jung . 2015 . NVMMU: A Non-Volatile Memory Management Unit for Heterogeneous GPU-SSD Architectures. In The 24th International Conference on Parallel Architectures and Compilation Techniques(PACT 2015). Jie Zhang, David Donofrio, John Shalf, Mahmut Kandemir, and Myoungsoo Jung. 2015. NVMMU: A Non-Volatile Memory Management Unit for Heterogeneous GPU-SSD Architectures. In The 24th International Conference on Parallel Architectures and Compilation Techniques(PACT 2015)."},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190544"},{"key":"e_1_3_2_1_97_1","volume-title":"DRAM-Less: Hardware Acceleration of Data Processing with New Memory. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). 287\u2013302","author":"Zhang J.","year":"2020","unstructured":"J. Zhang , G. Park , D. Donofrio , J. Shalf , and M. Jung . 2020 . DRAM-Less: Hardware Acceleration of Data Processing with New Memory. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). 287\u2013302 . https:\/\/doi.org\/10.1109\/HPCA47549. 2020 .00032 10.1109\/HPCA47549.2020.00032 J. Zhang, G. Park, D. Donofrio, J. Shalf, and M. Jung. 2020. DRAM-Less: Hardware Acceleration of Data Processing with New Memory. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). 287\u2013302. https:\/\/doi.org\/10.1109\/HPCA47549.2020.00032"},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00030"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555759"},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.1145\/1457150.1457160"}],"event":{"name":"MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"],"location":"Virtual Event Greece","acronym":"MICRO '21"},"container-title":["MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480122","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3466752.3480122","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3466752.3480122","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3466752.3480122","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:57Z","timestamp":1750191537000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480122"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":101,"alternative-id":["10.1145\/3466752.3480122","10.1145\/3466752"],"URL":"https:\/\/doi.org\/10.1145\/3466752.3480122","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}