{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T05:21:28Z","timestamp":1761110488707,"version":"3.37.3"},"reference-count":27,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100006602","name":"Air Force Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006602","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["FA8650-18-2-7864"],"award-info":[{"award-number":["FA8650-18-2-7864"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Solid-State Circuits"],"published-print":{"date-parts":[[2020,4]]},"DOI":"10.1109\/jssc.2019.2960480","type":"journal-article","created":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T21:05:44Z","timestamp":1577912744000},"page":"933-944","source":"Crossref","is-referenced-by-count":15,"title":["A 7.3 M Output Non-Zeros\/J, 11.7 M Output Non-Zeros\/GB Reconfigurable Sparse Matrix\u2013Matrix Multiplication Accelerator"],"prefix":"10.1109","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3052-6890","authenticated-orcid":false,"given":"Dong-Hyeon","family":"Park","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1564-7443","authenticated-orcid":false,"given":"Subhankar","family":"Pal","sequence":"additional","affiliation":[]},{"given":"Siying","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Jielun","family":"Tan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1761-1898","authenticated-orcid":false,"given":"Austin","family":"Rovinski","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5796-8862","authenticated-orcid":false,"given":"Shaolin","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1689-6000","authenticated-orcid":false,"given":"Chun","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Aporva","family":"Amarnath","sequence":"additional","affiliation":[]},{"given":"Timothy","family":"Wesley","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5092-9094","authenticated-orcid":false,"given":"Jonathan","family":"Beaumont","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4168-6446","authenticated-orcid":false,"given":"Kuan-Yu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Chaitali","family":"Chakrabarti","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4074-6347","authenticated-orcid":false,"given":"Michael Bedford","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"Trevor","family":"Mudge","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6744-7075","authenticated-orcid":false,"given":"David","family":"Blaauw","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6658-5502","authenticated-orcid":false,"given":"Hun-Seok","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Ronald G.","family":"Dreslinski","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"192","author":"hapla","year":"2013","journal-title":"Use of Direct Solvers in TFETI Massively Parallel Implementation"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718003"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1137\/110848244"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2008.45"},{"key":"ref14","first-page":"559","article-title":"Performance evaluation of sparse matrix multiplication kernels on Intel Xeon Phi","author":"saule","year":"2013","journal-title":"Proc Int Conf Parallel Process Appl Math"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA.2012.99"},{"key":"ref16","article-title":"Accelerating the ANSYS direct sparse solver with GPUs","author":"krawezik","year":"2009","journal-title":"Proc Symp Appl Acc High-Perf Comput (SAAHPC)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2699470"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2011.62"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1002\/cta.796"},{"key":"ref4","article-title":"The combinatorial BLAS: Design, implementation, and applications","author":"bulu\u00e7","year":"0","journal-title":"Int J High Perform Comput Appl"},{"key":"ref27","first-page":"45","article-title":"Introducing the graph 500","volume":"19","author":"murphy","year":"2010","journal-title":"Cray User Group"},{"key":"ref3","first-page":"260","article-title":"High-performance graph algorithms from parallel sparse matrices","author":"gilbert","year":"2006","journal-title":"Proc International Workshop on Parallel Computing"},{"article-title":"Graph clustering by flow simulation","year":"2000","author":"van dongen","key":"ref6"},{"article-title":"An interactive system for combinatorial scientific computing with an emphasis on programmer productivity","year":"2007","author":"shah","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1137856.1137866"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2015.75"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2008.45"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2005.11.008"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2016.7482073"},{"key":"ref22","first-page":"39","article-title":"2.9 TOPS\/W reconfigurable dense\/sparse matrix&#x2013;multiply accelerator with unified INT8\/INTI6\/FP16 datapath in 14 NM tri&#x2013;gate CMOS","author":"anders","year":"2018","journal-title":"Proc IEEE Symp VLSI Circuits"},{"key":"ref21","first-page":"1","article-title":"A 190 GFLOPS\/W DSP for energy-efficient sparse-BLAS in embedded IoT","author":"dorrance","year":"2016","journal-title":"Proc IEEE Symp VLSI Circuits (VLSI)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10588-005-5381-4"},{"key":"ref23","first-page":"478","article-title":"A 4.5 Tb\/s 3.4Tb\/s\/W \n$64\\times64$\n switch fabric with self-updating least-recently-granted priority and quality-of-service arbitration in 45 nm CMOS","author":"satpathy","year":"2012","journal-title":"IEEE Int Solid-State Circuits Conf (ISSCC) Dig Tech Papers"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/2611758"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2610518"}],"container-title":["IEEE Journal of Solid-State Circuits"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/4\/9048008\/08947989.pdf?arnumber=8947989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T20:15:32Z","timestamp":1643314532000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8947989\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4]]},"references-count":27,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/jssc.2019.2960480","relation":{},"ISSN":["0018-9200","1558-173X"],"issn-type":[{"type":"print","value":"0018-9200"},{"type":"electronic","value":"1558-173X"}],"subject":[],"published":{"date-parts":[[2020,4]]}}}