{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,11]],"date-time":"2025-05-11T20:01:15Z","timestamp":1746993675288,"version":"3.37.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T00:00:00Z","timestamp":1535760000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T00:00:00Z","timestamp":1535760000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001711","name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","doi-asserted-by":"publisher","award":["407540_167186"],"award-info":[{"award-number":["407540_167186"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006228","name":"Oak Ridge National Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006228","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007065","name":"Nvidia","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007065","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sci Comput"],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1007\/s10915-018-0809-4","type":"journal-article","created":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T11:16:54Z","timestamp":1535800614000},"page":"1174-1206","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Algorithmic Patterns for $$\\mathcal {H}$$-Matrices on Many-Core Processors"],"prefix":"10.1007","volume":"78","author":[{"given":"Peter","family":"Zaspel","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,9,1]]},"reference":[{"key":"809_CR1","doi-asserted-by":"crossref","unstructured":"Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J.: Novel HPC techniques to batch execution of many variable size BLAS computations on GPUs. In: Proceedings of the International Conference on Supercomputing, ICS \u201917, pp. 5:1\u20135:10. ACM, New York (2017)","DOI":"10.1145\/3079079.3079103"},{"issue":"1","key":"809_CR2","doi-asserted-by":"publisher","first-page":"C66","DOI":"10.1137\/130915662","volume":"36","author":"E Agullo","year":"2014","unstructured":"Agullo, E., Bramas, B., Coulaud, O., Darve, E., Messner, M., Takahashi, T.: Task-based FMM for multicore architectures. SIAM J. Sci. Comput. 36(1), C66\u2013C93 (2014)","journal-title":"SIAM J. Sci. Comput."},{"key":"809_CR3","unstructured":"Bebendorf, M.: AHMED Another software library on hierarchical matrices for elliptic differential equations \n                    https:\/\/github.com\/xantares\/ahmed\n                    \n                  . Accessed 31 Aug 2018"},{"key":"809_CR4","volume-title":"Hierarchical Matrices\u2014A Means to Efficiently Solve Elliptic Boundary Value Problems, Lecture Notes in Computational Science and Engineering","author":"M Bebendorf","year":"2008","unstructured":"Bebendorf, M.: Hierarchical Matrices\u2014A Means to Efficiently Solve Elliptic Boundary Value Problems, Lecture Notes in Computational Science and Engineering, vol. 63. Springer, Berlin (2008)"},{"issue":"3","key":"809_CR5","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1216\/JIE-2009-21-3-331","volume":"21","author":"M Bebendorf","year":"2009","unstructured":"Bebendorf, M., Kunis, S.: Recompression techniques for adaptive cross approximation. J. Integr. Equ. Appl. 21(3), 331\u2013357 (2009)","journal-title":"J. Integr. Equ. Appl."},{"issue":"1","key":"809_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00607-002-1469-6","volume":"70","author":"M Bebendorf","year":"2003","unstructured":"Bebendorf, M., Rjasanow, S.: Adaptive low-rank approximation of collocation matrices. Computing 70(1), 1\u201324 (2003)","journal-title":"Computing"},{"key":"809_CR7","first-page":"359","volume":"2","author":"N Bell","year":"2011","unstructured":"Bell, N., Hoberock, J.: Thrust: a productivity-oriented library for CUDA. GPU Comput. Gems Jade Ed. 2, 359\u2013371 (2011)","journal-title":"GPU Comput. Gems Jade Ed."},{"issue":"06","key":"809_CR8","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1142\/S0218195999000303","volume":"09","author":"M Bern","year":"1999","unstructured":"Bern, M., Eppstein, D., Teng, S.H.: Parallel construction of quadtrees and quality triangulations. Int. J. Comput. Geom. Appl. 09(06), 517\u2013532 (1999)","journal-title":"Int. J. Comput. Geom. Appl."},{"issue":"3","key":"809_CR9","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/s00791-004-0135-2","volume":"7","author":"S B\u00f6rm","year":"2004","unstructured":"B\u00f6rm, S.: $$\\cal{H}^2$$-matrices\u2014multilevel methods for the approximation of integral operators. Comput. Vis. Sci. 7(3), 173\u2013181 (2004)","journal-title":"Comput. Vis. Sci."},{"key":"809_CR10","unstructured":"B\u00f6rm, S.: H2Lib, A Library for Hierarchical Matrices (2017). \n                    http:\/\/www.h2lib.org\n                    \n                  . Accessed 31 Aug 2018"},{"key":"809_CR11","unstructured":"B\u00f6rm, S., Christophersen, S.: Approximation of BEM Matrices Using GPGPUs. ArXiv e-prints (2015)"},{"issue":"5","key":"809_CR12","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1016\/S0955-7997(02)00152-2","volume":"27","author":"S B\u00f6rm","year":"2003","unstructured":"B\u00f6rm, S., Grasedyck, L., Hackbusch, W.: Introduction to hierarchical matrices with applications. Eng. Anal. Bound. Elem. 27(5), 405\u2013422 (2003)","journal-title":"Eng. Anal. Bound. Elem."},{"key":"809_CR13","unstructured":"Boukaram, W., Ltaief, H., Litvinenko, A., Abdelfattah, A., Keyes, D.E.: Accelerating matrix\u2013vector multiplication on hierarchical matrices using graphical processing units"},{"key":"809_CR14","doi-asserted-by":"crossref","unstructured":"Boukaram, W.H., Turkiyyah, G., Ltaief, H., Keyes, D.E.: Batched QR and SVD Algorithms on GPUs with Applications in Hierarchical Matrix Compression. ArXiv e-prints (2017)","DOI":"10.1016\/j.parco.2017.09.001"},{"key":"809_CR15","unstructured":"Charara, A., Keyes, D., Ltaief, H.: Batched triangular dense linear algebra kernels for very small matrix sizes on GPUs. ACM Trans. Math. Softw. 9(4), Art. No 39 (2017)"},{"key":"809_CR16","doi-asserted-by":"publisher","DOI":"10.1142\/6437","volume-title":"Meshfree Approximation Methods with MATLAB","author":"GF Fasshauer","year":"2007","unstructured":"Fasshauer, G.F.: Meshfree Approximation Methods with MATLAB. World Scientific Publishing Co., Inc, River Edge (2007)"},{"key":"809_CR17","doi-asserted-by":"crossref","unstructured":"Garanzha, K., Pantaleoni, J., McAllister, D.: Simpler and faster HLBVH with work queues. In: Proceedings of the ACM SIGGRAPH Symposium on High Performance Graphics, HPG \u201911, pp. 59\u201364. ACM, New York (2011)","DOI":"10.1145\/2018323.2018333"},{"issue":"5","key":"809_CR18","doi-asserted-by":"publisher","first-page":"S358","DOI":"10.1137\/15M1010117","volume":"38","author":"P Ghysels","year":"2016","unstructured":"Ghysels, P., Li, X.S., Rouet, F., Williams, S., Napov, A.: An efficient multicore implementation of a novel HSS-structured multifrontal solver using randomized sampling. SIAM J. Sci. Comput. 38(5), S358 (2016)","journal-title":"SIAM J. Sci. Comput."},{"issue":"4","key":"809_CR19","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/s00791-008-0098-9","volume":"11","author":"L Grasedyck","year":"2008","unstructured":"Grasedyck, L., Kriemann, R., Le Borne, S.: Parallel black box-LU preconditioning for elliptic boundary value problems. Comput. Vis. Sci. 11(4), 273\u2013291 (2008)","journal-title":"Comput. Vis. Sci."},{"key":"809_CR20","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1017\/S0962492900002725","volume":"6","author":"L Greengard","year":"1997","unstructured":"Greengard, L., Rokhlin, V.: A new version of the fast multipole method for the Laplace equation in three dimensions. Acta Numer. 6, 229\u2013269 (1997)","journal-title":"Acta Numer."},{"key":"809_CR21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-47324-5","volume-title":"Hierarchical Matrices : Algorithms and Analysis, Springer Series in Computational Mathematics","author":"W Hackbusch","year":"2015","unstructured":"Hackbusch, W.: Hierarchical Matrices : Algorithms and Analysis, Springer Series in Computational Mathematics, vol. 49. Springer, Berlin (2015)"},{"issue":"1","key":"809_CR22","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/s10013-015-0168-5","volume":"44","author":"W Hackbusch","year":"2016","unstructured":"Hackbusch, W.: Survey on the technique of hierarchical matrices. Vietnam J. Math. 44(1), 71\u2013101 (2016)","journal-title":"Vietnam J. Math."},{"issue":"1\u20132","key":"809_CR23","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1016\/S0168-9274(02)00121-6","volume":"43","author":"W Hackbusch","year":"2002","unstructured":"Hackbusch, W., B\u00f6rm, S.: $$\\cal{H}^2$$-matrix approximation of integral operators by interpolation. Appl. Numer. Math. 43(1\u20132), 129\u2013143 (2002)","journal-title":"Appl. Numer. Math."},{"key":"809_CR24","doi-asserted-by":"crossref","unstructured":"Hackbusch, W., Khoromskij, B., Sauter, S.A.: On $$\\cal{H}^2$$-matrices. In: Lectures on Applied Mathematics: Proceedings of the Symposium Organized by the Sonderforschungsbereich 438 on the Occasion of Karl\u2013Heinz Hoffmanns 60th Birthday, Munich, 30 June\u20131 July 1999, p.\u00a09. Springer (2000)","DOI":"10.1007\/978-3-642-59709-1_2"},{"issue":"4","key":"809_CR25","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1007\/BF01396324","volume":"54","author":"W Hackbusch","year":"1989","unstructured":"Hackbusch, W., Nowak, Z.P.: On the fast matrix multiplication in the boundary element method by panel clustering. Numer. Math. 54(4), 463\u2013491 (1989)","journal-title":"Numer. Math."},{"issue":"3","key":"809_CR26","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/s00607-004-0102-2","volume":"74","author":"R Kriemann","year":"2005","unstructured":"Kriemann, R.: Parallel $$\\cal{H}$$-matrix arithmetics on shared memory systems. Computing 74(3), 273\u2013297 (2005)","journal-title":"Computing"},{"issue":"3","key":"809_CR27","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/s00791-014-0226-7","volume":"16","author":"R Kriemann","year":"2013","unstructured":"Kriemann, R.: $$\\cal{H}$$-LU factorization on many-core systems. Comput. Vis. Sci. 16(3), 105\u2013117 (2013)","journal-title":"Comput. Vis. Sci."},{"key":"809_CR28","unstructured":"Kriemann, R.: $$\\cal{H}$$-$$\\text{Lib}^{\\text{ pro }}$$ (website) (2017). \n                    http:\/\/www.hlibpro.com\n                    \n                  . Accessed 31 Aug 2018"},{"issue":"2","key":"809_CR29","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1111\/j.1467-8659.2009.01377.x","volume":"28","author":"C Lauterbach","year":"2009","unstructured":"Lauterbach, C., Garland, M., Sengupta, S., Luebke, D., Manocha, D.: Fast BVH construction on GPUs. Comput. Graph. Forum 28(2), 375\u2013384 (2009)","journal-title":"Comput. Graph. Forum"},{"key":"809_CR30","doi-asserted-by":"publisher","first-page":"S720","DOI":"10.1137\/15M1026468","volume":"38","author":"WB March","year":"2016","unstructured":"March, W.B., Xiao, B., Yu, C., Biros, G.: ASKIT: an efficient, parallel library for high-dimensional kernel summations. SIAM J. Sci. Comput. 38, S720\u2013S749 (2016)","journal-title":"SIAM J. Sci. Comput."},{"key":"809_CR31","doi-asserted-by":"crossref","unstructured":"Merrill, D., Garland, M., Grimshaw, A.: Scalable GPU graph traversal. In: Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP \u201912, pp. 117\u2013128. ACM, New York (2012)","DOI":"10.1145\/2145816.2145832"},{"key":"809_CR32","unstructured":"Morton, G.: A computer oriented geodetic data base and a new technique in file sequencing. Technical Report Ottawa, Ontario, Canada (1966)"},{"key":"809_CR33","unstructured":"Poulson, J.: DMHM\u2014Distributed-Memory Hierarchical Matrices. \n                    https:\/\/bitbucket.org\/poulson\/dmhm\n                    \n                  . Accessed 31 Aug 2018"},{"key":"809_CR34","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3206.001.0001","volume-title":"Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning)","author":"C Rasmussen","year":"2005","unstructured":"Rasmussen, C., Williams, C.: Gaussian Processes for Machine Learning (Adaptive Computation and Machine Learning). MIT Press, Cambridge (2005)"},{"key":"809_CR35","doi-asserted-by":"crossref","unstructured":"Rouet, F.H., Li, X.S., Ghysels, P., Napov, A.: A distributed-memory package for dense hierarchically semi-separable matrix computations using randomization. ACM Trans. Math. Softw. 42(4), Art. No 27 (2016)","DOI":"10.1145\/2930660"},{"key":"809_CR36","first-page":"255","volume-title":"Algorithms to Solve Hierarchically Semi-separable Systems","author":"Z Sheng","year":"2007","unstructured":"Sheng, Z., Dewilde, P., Chandrasekaran, S.: Algorithms to Solve Hierarchically Semi-separable Systems, pp. 255\u2013294. Birkh\u00e4user Basel, Basel (2007)"},{"key":"809_CR37","doi-asserted-by":"crossref","unstructured":"Szuppe, J.: Boost.Compute: a parallel computing library for C++ based on OpenCL. In: Proceedings of the 4th International Workshop on OpenCL, IWOCL \u201916, pp. 15:1\u201315:39. ACM, New York (2016)","DOI":"10.1145\/2909437.2909454"},{"issue":"8","key":"809_CR38","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/79173.79181","volume":"33","author":"LG Valiant","year":"1990","unstructured":"Valiant, L.G.: A bridging model for parallel computation. Commun. ACM 33(8), 103\u2013111 (1990)","journal-title":"Commun. ACM"},{"key":"809_CR39","first-page":"105","volume-title":"Kernel Ridge Regression","author":"V Vovk","year":"2013","unstructured":"Vovk, V.: Kernel Ridge Regression, pp. 105\u2013116. Springer, Berlin (2013)"},{"key":"809_CR40","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511617539","volume-title":"Scattered Data Approximation","author":"H Wendland","year":"2004","unstructured":"Wendland, H.: Scattered Data Approximation. Cambridge University Press, Cambridge (2004)"},{"key":"809_CR41","unstructured":"Yalamanchili, P., Arshad, U., Mohammed, Z., Garigipati, P., Entschev, P., Kloppenborg, B., Malcolm, J., Melonakos, J.: ArrayFire\u2014a high performance software library for parallel computing with an easy-to-use API (2015). \n                    https:\/\/github.com\/arrayfire\/arrayfire\n                    \n                  . Accessed 31 Aug 2018"},{"key":"809_CR42","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.compfluid.2012.08.002","volume":"80","author":"R Yokota","year":"2013","unstructured":"Yokota, R., Barba, L.: FMM-based vortex method for simulation of isotropic turbulence on GPUs, compared with a spectral method. Comput. Fluids 80, 17\u201327 (2013)","journal-title":"Comput. Fluids"},{"issue":"25","key":"809_CR43","doi-asserted-by":"publisher","first-page":"1793","DOI":"10.1016\/j.cma.2010.02.008","volume":"199","author":"R Yokota","year":"2010","unstructured":"Yokota, R., Barba, L., Knepley, M.G.: PetRBF: a parallel O(N) algorithm for radial basis function interpolation with Gaussians. Comput. Methods Appl. Mech. Eng. 199(25), 1793\u20131804 (2010)","journal-title":"Comput. Methods Appl. Mech. Eng."},{"key":"809_CR44","unstructured":"Zaspel, P.: MPLA\u2014Massively Parallel Linear Algebra (2017). \n                    https:\/\/github.com\/zaspel\/MPLA\n                    \n                  . Accessed 31 Aug 2018"},{"key":"809_CR45","unstructured":"Zaspel, P.: hmglib\u2014Hierarchical Matrices on GPU(s) Library (2017). \n                    https:\/\/github.com\/zaspel\/hmglib\n                    \n                  . Accessed 31 Aug 2018"}],"container-title":["Journal of Scientific Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10915-018-0809-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-018-0809-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-018-0809-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,17]],"date-time":"2020-05-17T09:36:32Z","timestamp":1589708192000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10915-018-0809-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,1]]},"references-count":45,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,2]]}},"alternative-id":["809"],"URL":"https:\/\/doi.org\/10.1007\/s10915-018-0809-4","relation":{},"ISSN":["0885-7474","1573-7691"],"issn-type":[{"type":"print","value":"0885-7474"},{"type":"electronic","value":"1573-7691"}],"subject":[],"published":{"date-parts":[[2018,9,1]]},"assertion":[{"value":"31 August 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 September 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}