{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T09:20:56Z","timestamp":1780046456775,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100006234","name":"Sandia National Laboratories","doi-asserted-by":"publisher","award":["DE-NA-0003525"],"award-info":[{"award-number":["DE-NA-0003525"]}],"id":[{"id":"10.13039\/100006234","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3731599.3767544","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T16:18:44Z","timestamp":1762532324000},"page":"1808-1815","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A High Performance GPU CountSketch Implementation and Its Application to Multisketching and Least Squares Problems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5527-9263","authenticated-orcid":false,"given":"Andrew James","family":"Higgins","sequence":"first","affiliation":[{"name":"Sandia National Laboratories, Albuquerque, New Mexico, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2216-5546","authenticated-orcid":false,"given":"Erik","family":"Boman","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories, Albuquerque, New Mexico, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6196-2508","authenticated-orcid":false,"given":"Ichitaro","family":"Yamazaki","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories, Albuquerque, New Mexico, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Nir Ailon and Edo Liberty. 2009. Fast Dimension Reduction Using Rademacher Series on Dual BCH Codes. Discrete Comput. Geom. 42 4 (Dec. 2009) 615\u2013630.","DOI":"10.1007\/s00454-008-9110-x"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"Haim Avron Petar Maymounkov and Sivan Toledo. 2010. Blendenpik: Supercharging LAPACK\u2019s Least-Squares Solver. SIAM Journal on Scientific Computing 32 3 (2010) 1217\u20131236. 10.1137\/090767911","DOI":"10.1137\/090767911"},{"key":"e_1_3_3_1_4_2","unstructured":"Oleg Balabanov. 2022. Randomized Cholesky QR factorizations. arXiv:https:\/\/arXiv.org\/abs\/2210.09953."},{"key":"e_1_3_3_1_5_2","series-title":"Proceedings of Machine Learning Research","first-page":"1564","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Balabanov Oleg","year":"2023","unstructured":"Oleg Balabanov, Matthias Beaup\u00e8re, Laura Grigori, and Victor Lederer. 2023. Block Subsampled Randomized Hadamard Transform for Nystr\u00f6m Approximation on Distributed Architectures. In Proceedings of the 40th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0202). PMLR, 1564\u20131576."},{"key":"e_1_3_3_1_6_2","unstructured":"Oleg Balabanov and Laura Grigori. 2021. Randomized block Gram\u2013Schmidt process for solution of linear systems and eigenvalue problems. arXiv:https:\/\/arXiv.org\/abs\/2111.14641."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Oleg Balabanov and Anthony Nouy. 2019. Randomized linear algebra for model reduction. Part I: Galerkin methods and error estimation. Advances in Computational Mathematics 45 (2019) 2969\u2013\u20133019.","DOI":"10.1007\/s10444-019-09725-6"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45465-9_59"},{"key":"e_1_3_3_1_9_2","unstructured":"Tyler Chen Pradeep Niroula Archan Ray Pragna Subrahmanya Marco Pistoia and Niraj Kumar. 2025. GPU-Parallelizable Randomized Sketch-and-Precondition for Linear Regression using Sparse Sign Sketches. arxiv:https:\/\/arXiv.org\/abs\/2506.03070"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.5555\/1109557.1109682"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","unstructured":"Bernard\u00a0J. Fino and V.\u00a0Ralph Algazi. 1976. Unified Matrix Treatment of the Fast Walsh-Hadamard Transform. IEEE Trans. Comput. C-25 11 (1976) 1142\u20131146. 10.1109\/TC.1976.1674569","DOI":"10.1109\/TC.1976.1674569"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Andrew\u00a0J. Higgins Daniel\u00a0B. Szyld Erik\u00a0G. Boman and Ichitaro Yamazaki. 2024. Analysis of Randomized Householder-Cholesky QR Factorization with Multisketching. arxiv:https:\/\/arXiv.org\/abs\/2309.05868","DOI":"10.2172\/2540515"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718027"},{"key":"e_1_3_3_1_14_2","unstructured":"Ilse C.\u00a0F. Ipsen. 2025. Solution of Least Squares Problems with Randomized Preconditioned Normal Equations. arxiv:https:\/\/arXiv.org\/abs\/2507.18466"},{"key":"e_1_3_3_1_15_2","first-page":"2101","volume-title":"Proceedings of The 33rd International Conference on Machine Learning","volume":"48","author":"Kapralov Michael","year":"2016","unstructured":"Michael Kapralov, Vamsi Potluru, and David Woodruff. 2016. How to Fake Multiply by a Gaussian Matrix. In Proceedings of The 33rd International Conference on Machine Learning , Vol.\u00a048. 2101\u20132110."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"Per-Gunnar Martinsson and Joel\u00a0A. Tropp. 2020. Randomized numerical linear algebra: Foundations and algorithms. Acta Numerica 29 (2020) 403\u2013572. 10.1017\/S0962492920000021","DOI":"10.1017\/S0962492920000021"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/2488608.2488621"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Xiangrui Meng Michael\u00a0A. Saunders and Michael\u00a0W. Mahoney. 2014. LSRN: A Parallel Iterative Solver for Strongly Over- or Underdetermined Systems. SIAM Journal on Scientific Computing 36 2 (2014) C95\u2013C118. 10.1137\/120866580","DOI":"10.1137\/120866580"},{"key":"e_1_3_3_1_19_2","unstructured":"NVIDIA. 2025. cuBLAS Documentation. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html Accessed: 2025-07-29."},{"key":"e_1_3_3_1_20_2","unstructured":"NVIDIA. 2025. CUDA Samples. https:\/\/github.com\/NVIDIA\/cuda-samples\/tree\/master\/Samples\/5_Domain_Specific\/fastWalshTransform Accessed: 2025-07-29."},{"key":"e_1_3_3_1_21_2","unstructured":"NVIDIA. 2025. CUDA Toolkit Documentation. https:\/\/docs.nvidia.com\/cuda\/ Accessed: 2025-07-29."},{"key":"e_1_3_3_1_22_2","unstructured":"NVIDIA. 2025. cuRAND Documentation. https:\/\/docs.nvidia.com\/cuda\/curand\/index.html Accessed: 2025-07-29."},{"key":"e_1_3_3_1_23_2","unstructured":"NVIDIA. 2025. cuSOLVER Documentation. https:\/\/docs.nvidia.com\/cuda\/cusolver\/index.html Accessed: 2025-07-29."},{"key":"e_1_3_3_1_24_2","unstructured":"NVIDIA. 2025. cuSPARSE Documentation. https:\/\/docs.nvidia.com\/cuda\/cusparse\/index.html Accessed: 2025-07-29."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS.2006.37"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719574"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","unstructured":"Joel\u00a0A. Tropp. 2011. Improved Analysis of the Subsampled Randomized Hadamard Transform. Advances in Adaptive Data Analysis 03 01n02 (2011) 115\u2013126. 10.1142\/S1793536911000787","DOI":"10.1142\/S1793536911000787"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","unstructured":"David\u00a0P. Woodruff. 2014. Sketching as a Tool for Numerical Linear Algebra. Foundations and Trends in Theoretical Computer Science 10 (2014) 1\u2013157. 10.1561\/0400000060","DOI":"10.1561\/0400000060"}],"event":{"name":"SC Workshops '25: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St Louis MO USA","acronym":"SC Workshops '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3731599.3767544","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767544","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767544","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:31:38Z","timestamp":1767987098000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731599.3767544"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":27,"alternative-id":["10.1145\/3731599.3767544","10.1145\/3731599"],"URL":"https:\/\/doi.org\/10.1145\/3731599.3767544","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}