{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T06:47:32Z","timestamp":1775890052104,"version":"3.50.1"},"reference-count":452,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000028","name":"SAFARI Research Group\u2019s industrial partners through ASML, Facebook, Google, Huawei, Intel, Microsoft, VMware, and Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3110993","type":"journal-article","created":{"date-parts":[[2021,9,8]],"date-time":"2021-09-08T20:06:20Z","timestamp":1631131580000},"page":"134457-134502","source":"Crossref","is-referenced-by-count":64,"title":["DAMOV: A New Methodology and Benchmark Suite for Evaluating Data Movement Bottlenecks"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1557-4819","authenticated-orcid":false,"given":"Geraldo F.","family":"Oliveira","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6514-1571","authenticated-orcid":false,"given":"Juan","family":"Gomez-Luna","sequence":"additional","affiliation":[]},{"given":"Lois","family":"Orosa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9138-0613","authenticated-orcid":false,"given":"Saugata","family":"Ghose","sequence":"additional","affiliation":[]},{"given":"Nandita","family":"Vijaykumar","sequence":"additional","affiliation":[]},{"given":"Ivan","family":"Fernandez","sequence":"additional","affiliation":[]},{"given":"Mohammad","family":"Sadrosadati","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0075-2312","authenticated-orcid":false,"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref275","first-page":"484","article-title":"CODIC: A low-cost substrate for enabling custom in-DRAM functionalities and optimizations","author":"orosa","year":"2021","journal-title":"Proc ACM\/IEEE 48th Annu Int Symp Comput Archit (ISCA)"},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.1998.687990"},{"key":"ref277","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2012.6237036"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853230"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1145\/2872887.2750402"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00061"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67630-2_9"},{"key":"ref273","year":"2021","journal-title":"RLDRAM 2 and 3 Specifications"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485955"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1145\/122576.122580"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2015.230"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46079-6_24"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1016\/S0065-2458(08)60235-1"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.7"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.21"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1994.288164"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1145\/1735971.1736058"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1145\/1552309.1552310"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/1542431.1542446"},{"key":"ref267","article-title":"Understanding and improving the latency of DRAM-based memory systems","author":"chang","year":"2017"},{"key":"ref268","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.62"},{"key":"ref269","article-title":"Reducing DRAM latency at low cost by exploiting heterogeneity","author":"lee","year":"2016"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835945"},{"key":"ref287","first-page":"343","article-title":"Adaptive history-based memory schedulers","author":"hur","year":"2004","journal-title":"Proc Int Symp Microarchit (MICRO)"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.21"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155663"},{"key":"ref181","article-title":"MLP Yes! ILP No!","volume":"98","author":"glew","year":"1998","journal-title":"InASPLOS Wild and Crazy Idea Session"},{"key":"ref284","doi-asserted-by":"publisher","DOI":"10.1145\/1400751.1400799"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2016.55"},{"key":"ref283","article-title":"Controller for a synchronous DRAM that maximizes throughput by allowing memory requests and commands to be issued out of order","author":"zuravleff","year":"1997"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1145\/342001.339668"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2004.22"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1145\/123465.123475"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522356"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.49"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2006.10"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2006.5"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346185"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1145\/800015.808204"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD.2014.31"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.33"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317867"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2014.6974655"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830803"},{"key":"ref293","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669150"},{"key":"ref292","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555781"},{"key":"ref295","doi-asserted-by":"publisher","DOI":"10.1145\/2342356.2342436"},{"key":"ref294","first-page":"106","article-title":"A&#x00E9;rgia: Exploiting packet latency slack in on-chip networks","author":"das","year":"2010","journal-title":"Proc Ann Int Symp Comput Archit (ISCA)"},{"key":"ref297","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749724"},{"key":"ref296","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3177158"},{"key":"ref299","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2002.998307"},{"key":"ref298","first-page":"401","article-title":"Kilo-NOC: A heterogeneous network-on-chip architecture for scalability and service guarantees","author":"grot","year":"2011","journal-title":"2011 38th Annual International Symposium on Computer Architecture (ISCA) ISCA"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.51"},{"key":"ref153","first-page":"1","article-title":"ATLAS: A scalable and high-performance scheduling algorithm for multiple memory controllers","author":"kim","year":"2010","journal-title":"Proc Int Symp High-Perform Comput Arch"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2526003"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155664"},{"key":"ref150","article-title":"Performance evaluation and feasibility study of near-data processing on DRAM modules (DIMM-NDP) for scientific applications","author":"gries","year":"2019"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2015.33"},{"key":"ref291","doi-asserted-by":"publisher","DOI":"10.1145\/605397.605420"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.46"},{"key":"ref290","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628096"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818982"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1145\/3155920"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1145\/3232521"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967958"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669119"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2013.6557175"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2005.59"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1145\/286860.286864"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/2.67193"},{"key":"ref163","year":"2021","journal-title":"Intel vtune profiler user guide"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844459"},{"key":"ref161","year":"2021","journal-title":"Intel vtune profiler user guide"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2414456"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485963"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1145\/2847255"},{"key":"ref158","year":"2021","journal-title":"DAMOV Benchmark Suite and Simulation Framework"},{"key":"ref197","year":"2011","journal-title":"Intel Xeon Processor E3-1240"},{"key":"ref198","year":"2018","journal-title":"Understanding How General Exploration Works in Intel VTune Amplifier"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2017.7975269"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378497"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1145\/3203217.3203280"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1145\/3076113.3076116"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00050"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.22"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2013.6618805"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2015.2409732"},{"key":"ref200","year":"2014","journal-title":"CORAL Benchmark Codes"},{"key":"ref101","first-page":"263","article-title":"SynCron: Efficient synchronization support for near-data-processing architectures","author":"giannoula","year":"2021","journal-title":"Proc IEEE Int Symp High-Perform Comput Archit (HPCA)"},{"key":"ref100","first-page":"329","article-title":"SIMDRAM: A framework for bit-serial SIMD processing using DRAM","author":"hajinazar","year":"2021","journal-title":"Proc 26th ACM Int Conf Architectural Program Lang Operating Syst"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2014.2313874"},{"key":"ref409","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2015.2504899"},{"key":"ref407","first-page":"1","article-title":"Employing classification-based algorithms for general-purpose approximate computing","author":"oliveira","year":"2018","journal-title":"Proc 55th ACM\/ESDA\/IEEE Design Autom Conf (DAC)"},{"key":"ref408","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00027"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref405","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.36"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306794"},{"key":"ref406","first-page":"50","article-title":"Doppelg&#x00E4;nger: A cache for approximate computing","author":"miguel","year":"2015","journal-title":"Proc ISCA"},{"key":"ref201","article-title":"Parboil: A revised benchmark suite for scientific and commercial throughput computing","author":"stratton","year":"2012"},{"key":"ref403","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.6"},{"key":"ref202","first-page":"72","article-title":"The PARSEC benchmark suite: Characterization and architectural implications","author":"bienia","year":"2008","journal-title":"Proc 17th Int Conf Parallel Archit Compilation Techn (PACT)"},{"key":"ref404","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750399"},{"key":"ref207","article-title":"HPCG benchmark: A new metric for ranking high performance computing systems","author":"dongarra","year":"2015"},{"key":"ref401","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2009.29"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM.2016.7822731"},{"key":"ref402","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665696"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1995.524546"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1145\/1188455.1188677"},{"key":"ref400","doi-asserted-by":"publisher","DOI":"10.1145\/800255.810669"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.14778\/2809974.2809983"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2015.30"},{"key":"ref418","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2014.6893198"},{"key":"ref419","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830786"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1145\/2442516.2442530"},{"key":"ref414","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.3211105"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306783"},{"key":"ref415","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304006"},{"key":"ref214","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc NIPS"},{"key":"ref416","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00074"},{"key":"ref215","author":"redmon","year":"0","journal-title":"Darknet Open source neural networks in c"},{"key":"ref417","first-page":"318","article-title":"Intelligent architectures for intelligent computing systems","author":"mutlu","year":"2021","journal-title":"Proc Design Autom Test Eur Conf Exhibition (DATE)"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2017.8167757"},{"key":"ref410","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00025"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972740.43"},{"key":"ref411","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818953"},{"key":"ref218","year":"2014","journal-title":"9th DIMACS Implementation Challenge"},{"key":"ref412","doi-asserted-by":"publisher","DOI":"10.1145\/2742854.2742863"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx342"},{"key":"ref413","doi-asserted-by":"publisher","DOI":"10.1145\/2492408.2492418"},{"key":"ref420","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.9"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.3013728"},{"key":"ref222","article-title":"Memory hierarchy design for next generation scalable many-core platforms","author":"azarkhish","year":"2016"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.2307\/2346830"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1145\/2086696.2086707"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346201"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-92990-1_10"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2011.5763155"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2006.44"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1145\/1028176.1006708"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2014.54"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446095"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358256"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1145\/3299874.3317984"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322275"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2990599"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2898064"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2434872"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1007\/s11265-019-01505-1"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2752706"},{"key":"ref232","first-page":"1","article-title":"Gretch: A hardware prefetcher for graph analytics","author":"kaushik","year":"2021","journal-title":"Proc TACO"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926254"},{"key":"ref230","article-title":"Thermal feasibility of die-stacked processing in memory","author":"eckert","year":"2014","journal-title":"Proc WoNDP"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1145\/2611354.2611365"},{"key":"ref239","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263597"},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2001.991128"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358318"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00051"},{"key":"ref237","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379251"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1145\/1024393.1024407"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2019.00035"},{"key":"ref135","article-title":"How much computation power do you need for near-data processing in cloud?","author":"kim","year":"2017","journal-title":"Proc ASBD"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2020.3021336"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8715108"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/FPL50879.2020.00014"},{"key":"ref140","article-title":"Benchmarking a new paradigm: An experimental analysis of a real processing-in-memory architecture","author":"g\u00f3mez-luna","year":"2021","journal-title":"arXiv 2105 03814"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818955"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3065448"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-021-03661-3"},{"key":"ref144","first-page":"908","article-title":"FAFNIR: Accelerating sparse gathering by using efficient near-memory intelligent reduction","author":"asgari","year":"2021","journal-title":"Proc IEEE Int Symp High-Perform Comput Archit (HPCA)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IMW.2013.6582088"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818985"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173177"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2008.4658635"},{"key":"ref242","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1145\/1854273.1854328","article-title":"Efficient Runahead Threads","author":"ramirez","year":"2010","journal-title":"Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques (PACT)"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346187"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1145\/605397.605427"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2003.1261383"},{"key":"ref248","author":"mutlu","year":"2015","journal-title":"Lecture Notes for 18&#x2013;447 Computer Architecture&#x2013;Lecture 17 Memory Hierarchy and Caches"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485930"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2018.8342135"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1999.765944"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1109\/12.817393"},{"key":"ref109","article-title":"Buddy-RAM: Improving the performance and efficiency of bulk bitwise operations using DRAM","author":"seshadri","year":"2016","journal-title":"arXiv 1611 09988"},{"key":"ref108","article-title":"PLUTo: In-DRAM lookup tables to enable massively parallel general-purpose computation","author":"dinis ferreira","year":"2021","journal-title":"arXiv 2104 07699"},{"key":"ref107","article-title":"SISA: Set-centric instruction set architecture for graph mining on processing-in-memory systems","author":"besta","year":"2021","journal-title":"arXiv 2104 07582"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00026"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00011"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2017.8344611"},{"key":"ref103","article-title":"Mitigating edge machine learning inference bottlenecks: An empirical study on accelerating Google edge models","author":"boroumand","year":"2021","journal-title":"arXiv 2103 00768"},{"key":"ref102","article-title":"Polynesia: Enabling effective hybrid transactional\/analytical databases with specialized hardware\/software co-design","author":"boroumand","year":"2021","journal-title":"arXiv 2103 00798"},{"key":"ref111","article-title":"GRIM-filter: Fast seed filtering in read mapping using emerging memory technologies","author":"kim","year":"2017","journal-title":"arXiv 1708 04329"},{"key":"ref112","article-title":"Enabling the adoption of Processing-in-memory: Challenges, mechanisms, future research directions","author":"ghose","year":"2018","journal-title":"arXiv 1802 00320"},{"key":"ref110","article-title":"LazyPIM: Efficient support for cache coherence in Processing-in-Memory architectures","author":"boroumand","year":"2017","journal-title":"arXiv 1706 03162"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665694"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080214"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1145\/264107.264213"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1995.476814"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853204"},{"key":"ref253","doi-asserted-by":"publisher","DOI":"10.1145\/2677956"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2007.70816"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2003.1183548"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370862"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2017.7927156"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3219617.3219661"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.16"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2019.2915318"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SP40000.2020.00090"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485928"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2366231.2337161"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080242"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2015.2435018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2015.58"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2014.55"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1147\/rd.462.0187"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2591971.2592000"},{"key":"ref119","first-page":"1","article-title":"A case for near memory computation inside the smart memory cube","author":"azarkhish","year":"2016","journal-title":"Proc EMS"},{"key":"ref114","article-title":"A workload and programming ease driven perspective of processing-in-memory","author":"ghose","year":"2019","journal-title":"arXiv 1907 12947"},{"key":"ref113","article-title":"RowClone: Accelerating data movement and initialization using DRAM","author":"seshadri","year":"2018","journal-title":"arXiv 1805 03502"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/2.375174"},{"key":"ref115","first-page":"1","article-title":"QUAC-TRNG: High-throughput true random number generation using quadruple row activation in commodity DRAM chips","author":"olgun","year":"2021","journal-title":"Proc ACM\/IEEE 48th Annu Int Symp Comput Archit (ISCA)"},{"key":"ref120","first-page":"19","article-title":"Memory bandwidth and machine balance in current high performance computers","volume":"2","author":"mccalpin","year":"1995","journal-title":"IEEE Comput Soc Tech Committee Comput Archit (TCCA) Newslett"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.51"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00053"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8715270"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.30"},{"key":"ref261","first-page":"615","article-title":"Tiered-latency DRAM: A low latency and low cost DRAM architecture","author":"lee","year":"2013","journal-title":"Proc IEEE 19th Int Symp High Perform Comput Archit (HPCA)"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446096"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322231"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00032"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2018.00051"},{"key":"ref266","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196136"},{"key":"ref365","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669154"},{"key":"ref364","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000081"},{"key":"ref363","doi-asserted-by":"publisher","DOI":"10.1145\/264107.264207"},{"key":"ref362","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2005.9"},{"key":"ref361","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.29"},{"key":"ref360","doi-asserted-by":"publisher","DOI":"10.1145\/1138035.1138038"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983056"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783735"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3323476"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835958"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2150982"},{"key":"ref359","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1995.476815"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"ref37","article-title":"A modern primer on processing in memory","author":"mutlu","year":"2021","journal-title":"Emerging Computing From Devices to Systems&#x2014;Looking Beyond Moore and Von Neumann"},{"key":"ref357","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2934048"},{"key":"ref358","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2010.214"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/VLSI-DAT49148.2020.9196490"},{"key":"ref355","first-page":"1919","article-title":"Learning memory access patterns","author":"hashemi","year":"2018","journal-title":"Proc ICML"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2019.01.009"},{"key":"ref356","doi-asserted-by":"publisher","DOI":"10.1145\/3239567"},{"key":"ref352","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00021"},{"key":"ref351","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358325"},{"key":"ref354","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542349"},{"key":"ref353","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1992.697004"},{"key":"ref350","doi-asserted-by":"publisher","DOI":"10.1109\/12.381947"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1998582.1998590"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2015.57"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950392"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2016.30"},{"key":"ref348","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2001.937427"},{"key":"ref349","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263631"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056057"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123945"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3078505.3078590"},{"key":"ref344","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8714956"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3078505.3078533"},{"key":"ref345","doi-asserted-by":"publisher","DOI":"10.1145\/3422575.3422778"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835946"},{"key":"ref346","doi-asserted-by":"publisher","DOI":"10.1145\/3357526.3357550"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2896377.2901453"},{"key":"ref347","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378498"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2625244"},{"key":"ref383","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-47847-7_11"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00058"},{"key":"ref382","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1999.744311"},{"key":"ref381","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2001.903264"},{"key":"ref380","doi-asserted-by":"publisher","DOI":"10.1145\/291069.291058"},{"key":"ref387","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2002.1106008"},{"key":"ref386","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1999.765957"},{"key":"ref385","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2005.22"},{"key":"ref384","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1997.645815"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1186\/s12864-018-4460-0"},{"key":"ref379","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.1999.807407"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783764"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2003.1183532"},{"key":"ref377","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1997.645819"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798232"},{"key":"ref378","article-title":"Speculative execution based on value prediction","author":"gabbay","year":"1996"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2016.7753257"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.54"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830820"},{"key":"ref52","first-page":"2","article-title":"It&#x2019;s the memory, stupid!","volume":"10","author":"sites","year":"1996","journal-title":"MPR"},{"key":"ref370","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-005-7304-x"},{"key":"ref372","doi-asserted-by":"publisher","DOI":"10.1147\/rd.374.0547"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358280"},{"key":"ref371","doi-asserted-by":"publisher","DOI":"10.1145\/2836168"},{"key":"ref374","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1996.566464"},{"key":"ref373","doi-asserted-by":"publisher","DOI":"10.1145\/3090634"},{"key":"ref376","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1999.765940"},{"key":"ref375","doi-asserted-by":"publisher","DOI":"10.1145\/237090.237173"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM.2010.5703348"},{"key":"ref3","article-title":"Co-architecting controllers and DRAM to enhance DRAM process scaling","volume":"14","author":"kang","year":"2014","journal-title":"Proc Memory Forum"},{"key":"ref6","first-page":"19","article-title":"Research problems and opportunities in memory systems","volume":"1","author":"mutlu","year":"2014","journal-title":"Supercomputing Frontiers and Innovations"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"158","DOI":"10.1145\/2749469.2750392","article-title":"Profiling a warehouse-scale computer","author":"kanev","year":"2015","journal-title":"Proc 42nd Annu Int Symp Comput Archit"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00059"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00036"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-2163-8_6"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665726"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750386"},{"key":"ref366","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669155"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3309697.3331482"},{"key":"ref367","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2008.4771791"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001159"},{"key":"ref368","article-title":"Generalized correlation-based hardware prefetching","author":"charney","year":"1995"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750385"},{"key":"ref369","article-title":"Correlation-based hardware prefetching","author":"charney","year":"1995"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/373574.373576"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/977091.977115"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/216585.216588"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2012.6237032"},{"key":"ref320","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00018"},{"key":"ref321","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818952"},{"key":"ref73","year":"2021","journal-title":"Hybrid memory cube specification rev 2 0"},{"key":"ref72","first-page":"432","article-title":"A 1.2 V 8 Gb 8-channel 128 GB\/s high-bandwidth memory (HBM) stacked DRAM with effective microbump I\/O test methods using 29 nm process and TSV","author":"lee","year":"2014","journal-title":"IEEE Int Solid-State Circuits Conf (ISSCC) Dig Tech Papers"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/2902961.2903512"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844483"},{"key":"ref318","first-page":"556","article-title":"Fulcrum: A simplified control and access mechanism toward flexible and practical in-situ accelerators","author":"lenjani","year":"2020","journal-title":"Proc IEEE Int Symp High Perform Comput Archit (HPCA)"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056040"},{"key":"ref317","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.37"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322266"},{"key":"ref316","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358297"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2013.6670336"},{"key":"ref315","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665689"},{"key":"ref314","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2821565"},{"key":"ref313","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3287642"},{"key":"ref78","first-page":"1","article-title":"A processing in memory taxonomy and a case for studying fixed-function PIM","author":"loh","year":"2013","journal-title":"Proc WoNDP"},{"key":"ref312","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2018.00018"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967940"},{"key":"ref311","doi-asserted-by":"publisher","DOI":"10.7873\/DATE.2015.0054"},{"key":"ref319","doi-asserted-by":"publisher","DOI":"10.1145\/3132402.3132426"},{"key":"ref450","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581261"},{"key":"ref451","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037730"},{"key":"ref452","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2017.39"},{"key":"ref310","author":"friedman","year":"2008","journal-title":"The Elements of Statistical Learning"},{"key":"ref60","first-page":"951","article-title":"GenASM: A high-performance, low-power approximate string matching acceleration framework for genome sequence analysis","author":"cali","year":"2020","journal-title":"Proc 53rd Annu IEEE\/ACM Int Symp Microarchitecture (MICRO)"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/2832911"},{"key":"ref61","article-title":"Practical mechanisms for reducing processor-memory data movement in modern workloads","author":"boroumand","year":"2020"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2016.2577557"},{"key":"ref305","first-page":"1249","article-title":"Large Vector Extensions Inside the HMC","author":"marco a z alves","year":"2016","journal-title":"Design Automation Test in Europe Conference Exhibition (DATE)"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/2600212.2600213"},{"key":"ref304","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2016.8"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446059"},{"key":"ref307","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783753"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080233"},{"key":"ref306","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-30695-7_2"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2017.7927081"},{"key":"ref301","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173197"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-56258-2_3"},{"key":"ref300","author":"zhang","year":"2021","journal-title":"ZSim++"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"ref303","doi-asserted-by":"publisher","DOI":"10.1145\/3155287"},{"key":"ref302","author":"mutlu","year":"2020","journal-title":"Lecture Notes for Digital Design and Computer Architecture&#x2013;Lecture 15a Out-of-Order Execution"},{"key":"ref444","year":"2021","journal-title":"Accelerate Fast Math With Intel OneAPI Math Kernel Library"},{"key":"ref443","article-title":"MetaGraph: Indexing and analysing nucleotide archives at petabase-scale","author":"karasikov","year":"2020","journal-title":"BioRxiv"},{"key":"ref446","doi-asserted-by":"publisher","DOI":"10.1145\/1228268.1228270"},{"key":"ref445","year":"2021","journal-title":"SPEC CPU2000 Benchmark"},{"key":"ref309","doi-asserted-by":"publisher","DOI":"10.1145\/3087556.3087582"},{"key":"ref448","article-title":"Deep learning recommendation model for personalization and recommendation systems","author":"naumov","year":"2019","journal-title":"arXiv 1906 00091"},{"key":"ref308","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2017.56"},{"key":"ref447","doi-asserted-by":"publisher","DOI":"10.14778\/2735508.2735511"},{"key":"ref449","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-017-0506-1"},{"key":"ref441","article-title":"Contextual LSTM (CLSTM) models for large scale NLP tasks","author":"ghosh","year":"2016","journal-title":"arXiv 1602 06291"},{"key":"ref442","doi-asserted-by":"publisher","DOI":"10.1177\/1094342011403516"},{"key":"ref440","year":"2021","journal-title":"SPEC CPU2006 Benchmarks"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358260"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.55"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00052"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00033"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196029"},{"key":"ref342","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2003.1253185"},{"key":"ref90","article-title":"The processing using memory paradigm: In-DRAM bulk copy, initialization, bitwise AND and OR","author":"seshadri","year":"2016","journal-title":"arXiv 1610 09603"},{"key":"ref343","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2004.1310764"},{"key":"ref340","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00009"},{"key":"ref341","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8715034"},{"key":"ref336","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555814"},{"key":"ref335","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2151001"},{"key":"ref334","doi-asserted-by":"publisher","DOI":"10.1145\/1508244.1508274"},{"key":"ref333","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.37"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322257"},{"key":"ref99","first-page":"120","article-title":"NATSA: A near-data processing accelerator for time series analysis","author":"fernandez","year":"2020","journal-title":"Proc IEEE 38th Int Conf Comput Design (ICCD)"},{"key":"ref339","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00042"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00040"},{"key":"ref338","doi-asserted-by":"publisher","DOI":"10.1145\/1391469.1391666"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.21"},{"key":"ref337","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485936"},{"key":"ref439","doi-asserted-by":"publisher","DOI":"10.14778\/2732219.2732227"},{"key":"ref438","year":"2021","journal-title":"VP8\/VP9 Codec SDK"},{"key":"ref437","article-title":"Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM","author":"li","year":"2013","journal-title":"arXiv 1303 3997"},{"key":"ref436","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-13-238"},{"key":"ref435","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983043"},{"key":"ref434","article-title":"Mesoscale modeling and direct simulation of explosively dispersed granular materials","author":"mo","year":"2019"},{"key":"ref433","doi-asserted-by":"publisher","DOI":"10.1007\/s11036-013-0489-0"},{"key":"ref432","author":"pouchet","year":"2021","journal-title":"PolyBench The Polyhedral Benchmark Suite"},{"key":"ref430","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btp324"},{"key":"ref431","author":"beranek","year":"2021","journal-title":"Hardware Effects"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.42"},{"key":"ref81","first-page":"2069","article-title":"JAFAR: Near-data processing for databases","author":"augusta","year":"2015","journal-title":"Proc ACM SIGMOD Int Conf Manage Data"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875680"},{"key":"ref330","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1816020"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750397"},{"key":"ref331","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485924"},{"key":"ref332","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.41"},{"key":"ref89","first-page":"185","article-title":"RowClone: Fast and Energy-Efficient in-DRAM Bulk Data Copy and Initialization","author":"vivek seshadri","year":"2013","journal-title":"2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)"},{"key":"ref323","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2015.7363678"},{"key":"ref322","article-title":"3D-stacked memory-side acceleration: Accelerator and system design","author":"guo","year":"2014","journal-title":"Proc WoNDP"},{"key":"ref325","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123969"},{"key":"ref324","doi-asserted-by":"publisher","DOI":"10.1109\/3DIC.2013.6702348"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001139"},{"key":"ref327","first-page":"305","article-title":"Yoga: A hybrid dynamic VLIW\/OoO processor","author":"villavieja","year":"2014"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124544"},{"key":"ref326","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2014.6974710"},{"key":"ref429","doi-asserted-by":"publisher","DOI":"10.1109\/40.592312"},{"key":"ref87","article-title":"In-DRAM bulk bitwise execution engine","author":"seshadri","year":"2019","journal-title":"arXiv 1905 09822"},{"key":"ref329","doi-asserted-by":"publisher","DOI":"10.1145\/1250662.1250686"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123977"},{"key":"ref328","first-page":"305","article-title":"MorphCore: An energy-efficient microarchitecture for high performance ILP and high throughput TLP","author":"suleman","year":"2012","journal-title":"Proc 45th Annu IEEE\/ACM Int Symp Microarchitecture"},{"key":"ref426","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44570-6_6"},{"key":"ref425","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00061"},{"key":"ref428","doi-asserted-by":"publisher","DOI":"10.1145\/3323439.3323988"},{"key":"ref427","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2018.00028"},{"key":"ref422","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983059"},{"key":"ref421","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2010.73"},{"key":"ref424","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2018.2839189"},{"key":"ref423","first-page":"488","article-title":"The architectural implications of Facebook&#x2019;s DNN-based personalized recommendation","author":"gupta","year":"2020","journal-title":"Proc IEEE Int Symp High Perform Comput Archit (HPCA)"},{"key":"ref399","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2005.4"},{"key":"ref389","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542288"},{"key":"ref388","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1145\/2370816.2370870","article-title":"Base-Delta-Immediate Compression: Practical Data Compression for On-Chip Caches","author":"gennady pekhimenko","year":"2012","journal-title":"In International Conference on Parallel Architectures and Compilation Techniques (PACT)"},{"key":"ref390","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2000.898076"},{"key":"ref397","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540724"},{"key":"ref398","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2009.2020989"},{"key":"ref395","article-title":"Energy-efficient data compression for GPU memory systems","author":"pekhimenko","year":"2015","journal-title":"Proc ASPLOS"},{"key":"ref396","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056021"},{"key":"ref393","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446064"},{"key":"ref394","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2430853"},{"key":"ref391","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2004.1310776"},{"key":"ref392","doi-asserted-by":"publisher","DOI":"10.1145\/378993.379235"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09530719.pdf?arnumber=9530719","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T19:55:31Z","timestamp":1639770931000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9530719\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":452,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3110993","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}