{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:08:55Z","timestamp":1771949335562,"version":"3.50.1"},"reference-count":70,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,4]]},"DOI":"10.1109\/ispass.2016.7482073","type":"proceedings-article","created":{"date-parts":[[2016,6,2]],"date-time":"2016-06-02T17:06:06Z","timestamp":1464887166000},"page":"46-56","source":"Crossref","is-referenced-by-count":15,"title":["Analyzing the energy-efficiency of sparse matrix multiplication on heterogeneous systems: A comparative study of GPU, Xeon Phi and FPGA"],"prefix":"10.1109","author":[{"given":"Heiner","family":"Giefers","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Staar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Costas","family":"Bekas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christoph","family":"Hagleitner","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2015.7322439"},{"key":"ref39","year":"2014","journal-title":"PCIe-385N Product Brief Nallatech"},{"key":"ref38","year":"2013","journal-title":"Tesla K20 GPU Accelerator BD-06455-001_v07 NVIDIA"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1201\/9781420041545","author":"oklobdzija","year":"2001","journal-title":"The Computer Engineering Handbook"},{"key":"ref32","article-title":"Power and Performance Benchmark Methodology","year":"2012","journal-title":"Standard Performance Evaluation Corporation (SPEC) SPEC Power and Performance Committee"},{"key":"ref31","year":"0","journal-title":"Watts up? and Watts up? PRO Operators Manual"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1840845.1840883"},{"key":"ref37","year":"2014","journal-title":"Intel Xeon Phi Coprocessor Datasheet Intel"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LPE.1994.573184"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/4.535411"},{"key":"ref34","article-title":"Power-Aware Microarchitecture: Design and Modeling Challenges for Next-Generation Microprocessors","volume":"20","author":"brooks","year":"2000","journal-title":"IEEE Micro"},{"key":"ref60","article-title":"Floating-Point Megafunctions","year":"2013"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2015.2513673"},{"key":"ref61","article-title":"Floating-Point Operator v7.1","year":"2015"},{"key":"ref63","year":"2015","journal-title":"SDAccel Development Environment User Guide"},{"key":"ref28","article-title":"NVML API REFERENCE MANUAL","year":"2012","journal-title":"Version 4 304 55 ed NVIDIA Corp"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2015.7245733"},{"key":"ref27","article-title":"Towards a Universal FPGA Matrix-Vector Multiplication Architecture","author":"john","year":"2012","journal-title":"Field-Programmable Custom Computing Machines (FCCM)"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2007.443"},{"key":"ref66","article-title":"Energy Aware Consolidation for Cloud Computing","author":"srikantaiah","year":"2008","journal-title":"HotPower"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2588768.2576783"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/2532637"},{"key":"ref68","year":"2014","journal-title":"HP Moonshot System Family Guide Hewlett-Packard"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2015.7062933"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IGCC.2013.6604520"},{"key":"ref1","year":"0","journal-title":"The Green500 List-June 2015"},{"key":"ref20","article-title":"CUS-PARSE Library: A Set of Basic Linear AlgebraSubroutines for Sparse Matrices","author":"naumov","year":"2010","journal-title":"GPU Technology Conference"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2464996.2465013"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2009.5377620"},{"key":"ref23","article-title":"Performance Evaluation of Sparse Matrix Multiplication Kernels on Intel Xeon Phi","author":"saule","year":"2013","journal-title":"Parallel Process Appl Math"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2011.60"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2010.36"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1137\/130930352"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/1693453.1693471"},{"key":"ref59","article-title":"External Memory Interface Handbook","year":"2015"},{"key":"ref58","article-title":"IP Compiler for PCI Express","year":"2013"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2011.5762730"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/HOTOS.1999.798385"},{"key":"ref55","article-title":"Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance","author":"staar","year":"2016","journal-title":"Parallel and Distributed Processing Symposium (IPDPS)"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.68"},{"key":"ref53","article-title":"SuiteSparse: A Suite of Sparse Matrix Software","author":"davis","year":"0"},{"key":"ref52","article-title":"SparseLib++ v1.5 - Sparse Matrix Class Library","author":"pozo","year":"0","journal-title":"Tech Rep 1996 reference Guide"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2014.2380198"},{"key":"ref11","article-title":"The Landscape of Parallel Computing Research: A View from Berkeley","author":"asanovic","year":"2006","journal-title":"EECS Department University of California Berkeley Tech Rep"},{"key":"ref40","year":"2013","journal-title":"Altera SDK for OpenCL Programming Guide 13th ed Altera Corp"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2188286.2188341"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.7873\/DATE2014.205","article-title":"A unified methodology for a fast benchmarking of parallel architecture","author":"guerre","year":"2014","journal-title":"Design Automation and Test in Europe (DATE)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2014.6868642"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2014.162"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2145816.2145819"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI.2010.84"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2145694.2145704"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2009.179"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1950413.1950439"},{"key":"ref8","article-title":"SDA: Software-Defined Accelerator for Large-Scale DNN Systems","author":"ouyang","year":"0","journal-title":"HotChips26 2014"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853195"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304624"},{"key":"ref9","article-title":"Intel Xeon+FPGA Platform for the Data Center","author":"gupta","year":"2014","journal-title":"Field Programmable Logic and Applications (FPL) Workshop on Reconfigurable Computing for the Masses"},{"key":"ref46","article-title":"Automatic Performance Tuning of Sparse Matrix Kernels","author":"vuduc","year":"2004","journal-title":"University of California at Berkeley CA USA"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-008-0251-8"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/2555243.2555255"},{"key":"ref47","article-title":"SPARSKIT: a basic tool kit for sparse matrix computations","author":"saad","year":"1994","journal-title":"Tech Rep"},{"key":"ref42","year":"2014","journal-title":"Altera Stratix V Device Handbook"},{"key":"ref41","year":"2014","journal-title":"Intel Xeon Processor E5-2600 v2 Product Brief Intel"},{"key":"ref44","article-title":"A Streaming Supercomputer. Stanford Computer Systems Laboratory White Paper","author":"dally","year":"2001"},{"key":"ref43","article-title":"NVIDIA CUDA Sparse Matrix library","year":"0"}],"event":{"name":"2016 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","location":"Uppsala, Sweden","start":{"date-parts":[[2016,4,17]]},"end":{"date-parts":[[2016,4,19]]}},"container-title":["2016 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7480598\/7482062\/07482073.pdf?arnumber=7482073","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,17]],"date-time":"2024-06-17T09:15:51Z","timestamp":1718615751000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7482073\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,4]]},"references-count":70,"URL":"https:\/\/doi.org\/10.1109\/ispass.2016.7482073","relation":{},"subject":[],"published":{"date-parts":[[2016,4]]}}}