{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,1,12]],"date-time":"2023-01-12T02:26:28Z","timestamp":1673490388277},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2016,7,27]],"date-time":"2016-07-27T00:00:00Z","timestamp":1469577600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s10766-016-0448-z","type":"journal-article","created":{"date-parts":[[2016,7,27]],"date-time":"2016-07-27T17:43:02Z","timestamp":1469641382000},"page":"657-679","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Programming GPGPU Graph Applications with Linear Algebra Building Blocks"],"prefix":"10.1007","volume":"45","author":[{"given":"Shuai","family":"Che","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bradford M.","family":"Beckmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Steven K.","family":"Reinhardt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,7,27]]},"reference":[{"key":"448_CR1","doi-asserted-by":"crossref","unstructured":"Burtscher, M., Nasre, R., Pingali, K.: A quantitative study of irregular programs on GPUs. In: Proceedings of the 2012 IEEE International Symposium on Workload Characterization, pp. 141\u2013151 (2012)","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"448_CR2","doi-asserted-by":"crossref","unstructured":"Che, S., Beckmann, B., Reinhardt, S., Skadron, K.: Pannotia: understanding irregular GPGPU graph algorithms. In: Proceedings of the IEEE International Symposium on Workload Characterization (2013)","DOI":"10.1109\/IISWC.2013.6704684"},{"issue":"4","key":"448_CR3","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1177\/1094342011403516","volume":"25","author":"A Buluc","year":"2011","unstructured":"Buluc, A., Gilbert, J.R.: The combinatorial blas: design, implementation, and applications. Int. J. High Perform. Comput. Appl. 25(4), 496\u2013509 (2011)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"448_CR4","doi-asserted-by":"crossref","DOI":"10.1137\/1.9780898719918","volume-title":"Graph Algorithms in the Language of Linear Algebra","author":"J Kepner","year":"2011","unstructured":"Kepner, J., Gilbert, J.: Graph Algorithms in the Language of Linear Algebra. Society for Industrial and Applied Mathematics, Philadelphia, PA (2011)"},{"key":"448_CR5","doi-asserted-by":"crossref","unstructured":"Mattson, T., Bader, D.A., Berry, J.W., Bulu, A., Dongarra, J., Faloutsos, C., Feo, J., Gilbert, J.R., Gonzalez, J., Hendrickson, B., Kepner, J., Leiserson, C.E., Lumsdaine, A., Padua, D.A., Poole, S., Reinhardt, S., Stonebraker, M., Wallach, S., Yoo, A.: Standards for graph algorithm primitives. In: Proceedings of IEEE High Performance Extreme Computing Conference (2013)","DOI":"10.1109\/HPEC.2013.6670338"},{"key":"448_CR6","unstructured":"Low, Y., Gonzalez, J., Kyrola, A., Bickson, D., Guestrin, C., Hellerstein, J.M.: GraphLab: a new parallel framework for machine learning. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2010)"},{"key":"448_CR7","unstructured":"Graphics Core Next (GCN). Web resource. http:\/\/www.amd.com\/us\/products\/technologies\/gcn\/Pages\/gcn-architecture.aspx"},{"key":"448_CR8","unstructured":"AMD Accelerated Parallel Processing: OpenCL Programming Guide. Web resource. http:\/\/developer.amd.com\/resources\/heterogeneous-computing\/opencl-zone\/"},{"key":"448_CR9","unstructured":"OpenCL. Web Resource. http:\/\/www.khronos.org\/opencl\/"},{"key":"448_CR10","doi-asserted-by":"crossref","unstructured":"Burtscher, M., Pingali, K.: An efficient cuda implementation of the tree-based Barnes Hut n-body algorithm. In: Wen-mei, W.H. (ed.) GPU Computing Gems Emerald Edition, pp. 75\u201392. Morgan Kaufmann, San Francisco, CA (2011)","DOI":"10.1016\/B978-0-12-384988-5.00006-1"},{"key":"448_CR11","doi-asserted-by":"crossref","unstructured":"Harish, P., Narayanan, P.: Accelerating large graph algorithms on the GPU using CUDA. In: Proceedings of 2007 International Conference on High Performance Computing (2007)","DOI":"10.1007\/978-3-540-77220-0_21"},{"key":"448_CR12","doi-asserted-by":"crossref","unstructured":"Merrill, D.G., Garland, M., Grimshaw, A.S.: Scalable GPU graph traversal. In: Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (2012)","DOI":"10.1145\/2145816.2145832"},{"key":"448_CR13","doi-asserted-by":"crossref","unstructured":"Vineet, V., Harish, P., Patidar, S., Narayanan,P.J.: Fast minimum spanning tree for large graphs on the GPU. In: Proceedings of the Conference on High Performance Graphics (2009)","DOI":"10.1145\/1572769.1572796"},{"key":"448_CR14","unstructured":"The 10th DIMACS Implementation Challenge Graph Partitioning and Graph Clustering. Web resource. http:\/\/www.cc.gatech.edu\/dimacs10\/"},{"key":"448_CR15","unstructured":"The 9th DIMACS Implementation Challenge Shortest Paths. Web resource. http:\/\/www.dis.uniroma1.it\/challenge9\/"},{"key":"448_CR16","unstructured":"METIS File Format. Web Resource. http:\/\/people.sc.fsu.edu\/~jburkardt\/data\/metis_graph\/metis_graph.html"},{"key":"448_CR17","unstructured":"Matrix Market Format. Web Resouce. http:\/\/math.nist.gov\/MatrixMarket\/formats.html"},{"key":"448_CR18","unstructured":"The University of Florida Sparse Matrix Collection. Web Resource. http:\/\/www.cise.ufl.edu\/research\/sparse\/matrices\/"},{"key":"448_CR19","unstructured":"GTGraph: A Suite of Synthetic Random Graph Generators. Web Resource. http:\/\/www.cse.psu.edu\/~madduri\/software\/GTgraph\/index.html"},{"key":"448_CR20","unstructured":"Bell, N., Garland, M.: Efficient sparse matrix-vector multiplication on CUDA. NVIDIA Technical Report NVR-2008-004, NVIDIA Corporation (2008)"},{"key":"448_CR21","doi-asserted-by":"crossref","unstructured":"Greathouse, J.L., Daga, M.: Efficient sparse matrix-vector multiplication on gpus using the CSR storage format. In: Proceedings of the ACM\/IEEE International Conference on High Performance Computing, Networking, Storage and Analysis (2014)","DOI":"10.1109\/SC.2014.68"},{"key":"448_CR22","doi-asserted-by":"crossref","unstructured":"Su, B., Keutzer, K.: clSpMV: a cross-platform OpenCL SpMV framework on GPUs. In: Proceedings of the International Conference on Supercomputing (2012)","DOI":"10.1145\/2304576.2304624"},{"key":"448_CR23","doi-asserted-by":"crossref","unstructured":"Yang, C., Wang, Y., Owens, J.D.: Fast sparse matrix and sparse vector multiplication algorithm on the gpu. In: Proceedings of Graph Algorithms Building Blocks (2015)","DOI":"10.1109\/IPDPSW.2015.77"},{"key":"448_CR24","unstructured":"Sengupta, S., Harris, M., Zhang, Y., Owens, J.D.: Scan primitives for GPU computing. In: Proceedings of Graphics Hardware (2007)"},{"key":"448_CR25","unstructured":"Bolt C++ Template Library. Advanced Micro Devices. https:\/\/github.com\/HSA-Libraries\/Bolt"},{"key":"448_CR26","unstructured":"The Thrust library. Web Resource. http:\/\/code.google.com\/p\/thrust\/"},{"key":"448_CR27","doi-asserted-by":"crossref","unstructured":"Malewicz, G., Austern, M.H., Bik, A.J.C, Dehnert, J.C., Horn, I., Leiser, N., Czajkowski, G.: Pregel: a system for large-scale graph processing. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data (2010)","DOI":"10.1145\/1807167.1807184"},{"key":"448_CR28","doi-asserted-by":"crossref","unstructured":"Fineman, J.T., Robinson, E.: Fundamental graph algorithms. In: Kepner, J., Gilbert, J. (eds.) Graph Algorithms in the Language of Linear Algebra. Society for Industrial and Applied Mathematics, Philadelphia, PA (2011)","DOI":"10.1137\/1.9780898719918.ch5"},{"key":"448_CR29","doi-asserted-by":"crossref","unstructured":"Davidson, A., Baxter, S., Garland, M., Owens, J.D.: Work-efficient parallel gpu methods for single-source shortest paths. In: Proceedings of the International Parallel and Distributed Processing Symposium (2014)","DOI":"10.1109\/IPDPS.2014.45"},{"key":"448_CR30","unstructured":"Cohen, J., Castonguay, P.: Efficient Graph Matching and Coloring on the Gpu. http:\/\/developer.download.nvidia.com\/GTC\/PDF\/GTC2012\/PresentationPDF\/S0332-GTC2012-Graph-Coloring-GPU.pdf"},{"key":"448_CR31","doi-asserted-by":"crossref","unstructured":"Luby, M.: A simple parallel algorithm for the maximal independent set problem. In: Proceedings of the 17th Symposium on Theory of Computing (1985)","DOI":"10.1145\/22145.22146"},{"key":"448_CR32","unstructured":"Buluc, A., Duriakova, E., Fox, A., Gilbert, J., Kamil, S., Lugowski, A., Oliker, L., Williams, S.: Parallel processing of filtered queries in attributed semantic graphs. In: Proceedings of the International Parallel and Distributed Processing Symposium (2013)"},{"key":"448_CR33","unstructured":"Maximal Independent Set. Presentation Slides. http:\/\/acts.nersc.gov\/events\/para06\/Shah.pdf"},{"issue":"5\u20136","key":"448_CR34","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/j.parco.2009.12.002","volume":"36","author":"A Buluc","year":"2010","unstructured":"Buluc, A., Gilbert, J.R., Budak, C.: Solving path problems on the gpu. Parallel Comput. 36(5\u20136), 241\u2013253 (2010)","journal-title":"Parallel Comput."},{"key":"448_CR35","unstructured":"Heterogeneous System Architecture (HSA). Web resource. http:\/\/hsafoundation.com\/"},{"key":"448_CR36","unstructured":"Jia, W., Shaw, K.A., Martonosi, M.: Starchart: hardware and software optimization using recursive partitioning regression trees. In: Proceedings of the International Conference on Parallel Architectures and Compilation (2013)"},{"key":"448_CR37","doi-asserted-by":"crossref","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J.W., Lee, S-H., Skadron K.: Rodinia: a benchmark suite for heterogeneous computing. In: Proceedings of the IEEE International Symposium on Workload Characterization (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"448_CR38","unstructured":"Parboil Benchmark suite. Web Resource. http:\/\/impact.crhc.illinois.edu\/parboil.php"},{"key":"448_CR39","doi-asserted-by":"crossref","unstructured":"Danalis, A., Marin, G., McCurdy, C., Meredith, J.S., Roth, P.C., Spafford, K., Tipparaju, V. Vetter, J.S.: The scalable heterogeneous computing (SHOC) benchmark suite. In: Proceedings of Third Workshop on General-Purpose Computation on Graphics Processing Units (2010)","DOI":"10.1145\/1735688.1735702"},{"key":"448_CR40","unstructured":"Oliveira, V.M.A., Lotufo, R.A.: A study on connected components labeling algorithms using GPUs. In: Proceedings of the 23rd SIBGRAPI Conference on Graphics, Patterns and Images (2010)"},{"key":"448_CR41","doi-asserted-by":"crossref","unstructured":"Daga, M., Nutter, M.: Exploiting coarse-grained parallelism in B+ tree searches on an APU. In: SC Companion, pp. 240\u2013247 (2012)","DOI":"10.1109\/SC.Companion.2012.40"},{"key":"448_CR42","unstructured":"The Parallel Boost Graph Library. Web Resource. http:\/\/osl.iu.edu\/research\/pbgl\/"},{"key":"448_CR43","unstructured":"SNAP: Small-world Network Analysis and Partitioning. Web Resource. http:\/\/snap-graph.sourceforge.net\/"},{"key":"448_CR44","unstructured":"MultiThreaded Graph Library. Web Resource. https:\/\/software.sandia.gov\/trac\/mtgl"},{"key":"448_CR45","unstructured":"Kyrola, A., Blelloch, G., Guestrin, C.: GraphChi: large-scale graph computation on just a PC. In: Proceedings of the 10th USENIX Conference on Operating Systems Design and Implementation (2012)"},{"key":"448_CR46","doi-asserted-by":"crossref","unstructured":"Liu, W., Vinter, B.: An efficient gpu general sparse matrix\u2013matrix multiplication for irregular data. In: Proceedings of the 2014 IEEE 28th International Parallel and Distributed Processing Symposium (2014)","DOI":"10.1109\/IPDPS.2014.47"},{"key":"448_CR47","doi-asserted-by":"crossref","unstructured":"Azad, A., Bulu, A., Gilbert, J.R.: Parallel triangle counting and enumeration using matrix algebra. In: Proceedings of the IPDPSW, Workshop on Graph Algorithm Building Blocks (2015)","DOI":"10.1109\/IPDPSW.2015.75"},{"key":"448_CR48","unstructured":"Graph Analytics in GraphBLAS. Web resource. http:\/\/www.mit.edu\/~kepner\/Graphulo\/150301-GraphuloInGraphBLAS.pptx"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-016-0448-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-016-0448-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-016-0448-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-016-0448-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,11]],"date-time":"2019-09-11T20:21:00Z","timestamp":1568233260000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-016-0448-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,7,27]]},"references-count":48,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["448"],"URL":"https:\/\/doi.org\/10.1007\/s10766-016-0448-z","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,7,27]]}}}