{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T11:35:44Z","timestamp":1730201744268,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,9]]},"DOI":"10.1109\/cahpc.2018.8645938","type":"proceedings-article","created":{"date-parts":[[2019,2,21]],"date-time":"2019-02-21T23:19:26Z","timestamp":1550791166000},"page":"233-241","source":"Crossref","is-referenced-by-count":6,"title":["Multicore Performance Engineering of Sparse Triangular Solves Using a Modified Roofline Model"],"prefix":"10.1109","author":[{"given":"Markus","family":"Wittmann","sequence":"first","affiliation":[]},{"given":"Georg","family":"Hager","sequence":"additional","affiliation":[]},{"given":"Radim","family":"Janalik","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Lanser","sequence":"additional","affiliation":[]},{"given":"Axel","family":"Klawonn","sequence":"additional","affiliation":[]},{"given":"Oliver","family":"Rheinbach","sequence":"additional","affiliation":[]},{"given":"Olaf","family":"Schenk","sequence":"additional","affiliation":[]},{"given":"Gerhard","family":"Wellein","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/140997907"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22997-3_4"},{"key":"ref12","first-page":"417","article-title":"Performance engineering of the Kernel Polynomial Method on large-scale CPU-GPU systems","author":"kreutzer","year":"2015","journal-title":"Proc IPDPS 2015"},{"key":"ref13","first-page":"302","volume":"31","author":"li","year":"2005","journal-title":"An Overview of SuperLU Algorithms Implementation and User Interface"},{"key":"ref14","first-page":"287","article-title":"Evaluation of sparse LU factorization and triangular solution on multicore platforms","year":"2008","journal-title":"VECPAR 2008"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"617","DOI":"10.1007\/978-3-319-43659-3_45","article-title":"A synchronization-free algorithm for parallel sparse triangular solves","author":"liu","year":"2016","journal-title":"Euro-Par 2016 Parallel Processing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063387"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HPCS.2017.102"},{"key":"ref18","first-page":"124","article-title":"Sparsifying synchronization for high-performance shared-memory sparse triangular solver","author":"park","year":"2014","journal-title":"The Proceedings of ISCSLP 2014"},{"journal-title":"Scalable Parallel Sparse LU Factorization Methods on Shared Memory Multiprocessors","year":"2000","author":"schenk","key":"ref19"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1016\/0743-7315(88)90002-0","article-title":"Estimating interlock and improving balance for pipelined architectures","volume":"5","author":"callahan","year":"1988","journal-title":"J Parallel Distr Com"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/S0045-7825(99)00242-X"},{"journal-title":"Auto-tuning Performance on Multicore Computers","year":"2008","author":"williams","key":"ref27"},{"key":"ref6","first-page":"233","article-title":"Towards realistic performance bounds for implicit CFD codes","author":"gropp","year":"1999","journal-title":"Proceedings of Parallel CFD'99"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479894246905"},{"key":"ref8","first-page":"289","author":"heath","year":"1999","journal-title":"Performance of Parallel Sparse Triangular Solution"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3180"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2005.07.004"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0167-8191(89)90100-2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479899358194"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2003.07.011"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751240"},{"journal-title":"Scientific Supercomputing Architecture and Use of Shared and Distributed Memory Parallel Computers","year":"2000","author":"sch\u00f6nauer","key":"ref21"},{"journal-title":"Introducing a Performance Model for Bandwidth-Limited Loop Kernels","year":"2010","author":"treibig","key":"ref24"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1007\/b137868","author":"toselli","year":"2005","journal-title":"Domain Decomposition Methods-Algorithms and Theory"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref25","article-title":"Automatic performance tuning and analysis of sparse triangular solve","author":"vuduc","year":"2002","journal-title":"ICS"}],"event":{"name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","start":{"date-parts":[[2018,9,24]]},"location":"Lyon, France","end":{"date-parts":[[2018,9,27]]}},"container-title":["2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8638685\/8645847\/08645938.pdf?arnumber=8645938","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T08:08:01Z","timestamp":1643270881000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8645938\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/cahpc.2018.8645938","relation":{},"subject":[],"published":{"date-parts":[[2018,9]]}}}