{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T13:51:04Z","timestamp":1774705864932,"version":"3.50.1"},"reference-count":174,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61977046"],"award-info":[{"award-number":["61977046"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["ERC Advanced Grant E-DUALITY (787960)"],"award-info":[{"award-number":["ERC Advanced Grant E-DUALITY (787960)"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1657420"],"award-info":[{"award-number":["CCF-1657420"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1704828"],"award-info":[{"award-number":["CCF-1704828"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,10,1]]},"DOI":"10.1109\/tpami.2021.3097011","type":"journal-article","created":{"date-parts":[[2021,7,26]],"date-time":"2021-07-26T22:27:02Z","timestamp":1627338422000},"page":"7128-7148","source":"Crossref","is-referenced-by-count":84,"title":["Random Features for Kernel Approximation: A Survey on Algorithms, Theory, and Beyond"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4133-7921","authenticated-orcid":false,"given":"Fanghui","family":"Liu","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering (ESAT-STADIUS), KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4285-6520","authenticated-orcid":false,"given":"Xiaolin","family":"Huang","sequence":"additional","affiliation":[{"name":"Institute of Image Processing and Pattern Recognition, and Institute of Medical Robotics, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6416-5635","authenticated-orcid":false,"given":"Yudong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Operations Research and Information Engineering, Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8846-6352","authenticated-orcid":false,"given":"Johan A. K.","family":"Suykens","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering (ESAT-STADIUS), KU Leuven, Leuven, Belgium"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/4175.001.0001"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1142\/5089"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2785313"},{"issue":"140","key":"ref4","first-page":"1","article-title":"Generalization properties of hyper-RKHS and its applications","volume":"22","author":"Liu","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1090\/s0002-9947-1950-0051437-7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-0530-5"},{"key":"ref7","first-page":"1177","article-title":"Random features for large-scale kernel machines","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rahimi"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511617539"},{"key":"ref9","first-page":"1359","article-title":"Randomized nonlinear component analysis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lopez-Paz"},{"key":"ref10","first-page":"3383","article-title":"But how does it work in theory? Linear SVM with random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sun"},{"key":"ref11","first-page":"8571","article-title":"Neural tangent kernel: Convergence and generalization in neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Jacot"},{"key":"ref12","first-page":"8139","article-title":"On exact computation with an infinitely wide neural net","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Arora"},{"key":"ref13","article-title":"Scaling neural tangent kernels via sketching and random features","author":"Zandieh","year":"2021"},{"key":"ref14","first-page":"1","article-title":"Graph neural tangent kernel: Fusing graph neural networks with graph kernels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Du"},{"key":"ref15","first-page":"10968","article-title":"Graph random neural features for distance-preserving graph representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zambon"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctv36zrf8.5"},{"key":"ref17","first-page":"1","article-title":"Random feature attention","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Peng"},{"key":"ref18","first-page":"10835","article-title":"Generalization bounds of stochastic gradient descent for wide and deep neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cao"},{"key":"ref19","first-page":"322","article-title":"Fine-grained analysis of optimization and generalization for overparameterized two-layer neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arora"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.acha.2021.12.003"},{"key":"ref21","first-page":"1305","article-title":"Implicit bias of gradient descent for wide two-layer neural networks trained with the logistic loss","volume-title":"Proc. Conf. Learn. Theory","author":"Chizat"},{"key":"ref22","first-page":"1","article-title":"Polylogarithmic width suffices for gradient descent to achieve arbitrarily small test error with shallow ReLU networks","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Ji"},{"key":"ref23","first-page":"1975","article-title":"Orthogonal random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yu"},{"issue":"1","key":"ref24","first-page":"4096","article-title":"Quasi-Monte Carlo feature maps for shift-invariant kernels","volume":"17","author":"Avron","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref25","first-page":"6107","article-title":"Gaussian quadrature for kernel features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Dao"},{"key":"ref26","first-page":"9147","article-title":"Quadrature-based features for kernel approximation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Munkhoeva"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472872"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053272"},{"key":"ref29","first-page":"862","article-title":"On the error of random Fourier features","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Sutherland"},{"key":"ref30","first-page":"3905","article-title":"Towards a unified analysis of random Fourier features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2440-0"},{"issue":"1","key":"ref32","first-page":"1","article-title":"On the mathematical foundations of learning","volume":"39","year":"2002","journal-title":"Bull."},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511618796"},{"key":"ref34","volume-title":"Support Vector Machines","author":"Steinwart","year":"2008"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1934-05843-9"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-76371-2"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1215\/S0012-7094-42-00908-6"},{"key":"ref38","first-page":"308","article-title":"Regularization with dot-product kernels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Smola Zoltan"},{"key":"ref39","volume-title":"Spherical Harmonics","author":"M\u00fcller","year":"2006"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2005.12.126"},{"key":"ref41","first-page":"342","article-title":"Kernel methods for deep learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Cho"},{"key":"ref42","first-page":"295","article-title":"Computing with infinite networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Williams"},{"key":"ref43","article-title":"Gaussian error linear units","author":"Hendrycks","year":"2016"},{"key":"ref44","first-page":"2253","article-title":"Toward deeper understanding of neural networks: The power of initialization and a dual view on expressivity","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Daniely"},{"key":"ref45","first-page":"1","article-title":"Deep neural networks as Gaussian processes","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Lee"},{"key":"ref46","first-page":"2933","article-title":"On lazy training in differentiable programming","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Chizat"},{"key":"ref47","first-page":"12873","article-title":"On the inductive bias of neural tangent kernels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Bietti"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1214\/20-aos1990"},{"key":"ref49","first-page":"1","article-title":"Deep equals shallow for ReLU networks in kernel regimes","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Bietti"},{"key":"ref50","first-page":"1","article-title":"When do neural networks outperform kernel methods?","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Ghorbani"},{"key":"ref51","article-title":"Classifying high-dimensional gaussian mixtures: Where kernel methods fail and neural networks succeed","author":"Refinetti","year":"2021"},{"key":"ref52","first-page":"583","article-title":"Random feature maps for dot product kernels","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Kar"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487591"},{"key":"ref54","first-page":"9475","article-title":"Tight dimensionality reduction for sketching low degree polynomial kernels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Meister"},{"key":"ref55","first-page":"2258","article-title":"Subspace embeddings for the polynomial kernel","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Avron"},{"key":"ref56","first-page":"10324","article-title":"Near input sparsity time kernel embeddings via adaptive sampling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Woodruff"},{"key":"ref57","first-page":"1846","article-title":"Spherical random features for polynomial kernels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Pennington"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2934729"},{"key":"ref59","first-page":"1","article-title":"Fast learning in reproducing kernel Kre\u012dn spaces via signed measures","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Liu"},{"key":"ref60","first-page":"244","article-title":"FastFood\u2014Approximating kernel expansions in loglinear time","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Le"},{"key":"ref61","first-page":"2502","article-title":"Recycling randomness with structure for sublinear time kernel expansions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Choromanski"},{"key":"ref62","first-page":"3490","article-title":"Random feature mapping with signed circulant matrix projection","volume-title":"Proc. Int. Conf. Artif. Intell.","author":"Feng"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098081"},{"key":"ref64","first-page":"219","article-title":"The unreasonable effectiveness of structured random orthogonal embeddings","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Choromanski"},{"key":"ref65","first-page":"485","article-title":"Quasi-Monte Carlo feature maps for shift-invariant kernels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref66","first-page":"2256","article-title":"Spherical structured feature maps for kernel approximation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lyu"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10825"},{"key":"ref68","first-page":"5672","article-title":"On fast leverage score sampling and optimal learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rudi"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5920"},{"key":"ref70","first-page":"109","article-title":"Fourier sparse leverage scores and approximate kernel learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Erd\u00e9lyi"},{"key":"ref71","first-page":"1313","article-title":"Weighted sums of random kitchen sinks: Replacing minimization with randomization in learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rahimi"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/207"},{"key":"ref73","first-page":"1298","article-title":"Learning kernels with random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sinha"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11697"},{"key":"ref75","first-page":"1822","article-title":"Data-dependent compression of random features for large-scale kernel approximation","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Agrawal"},{"key":"ref76","first-page":"2007","article-title":"Implicit kernel learning","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Li"},{"key":"ref77","article-title":"Compact nonlinear maps and circulant extensions","author":"Yu","year":"2015"},{"key":"ref78","first-page":"1","article-title":"Not-so-random features","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Bullins"},{"key":"ref79","first-page":"1067","article-title":"Gaussian process kernels for pattern discovery and extrapolation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wilson"},{"key":"ref80","first-page":"1098","article-title":"\u00c0 la carte\u2013learning fast kernels","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Yang"},{"key":"ref81","article-title":"Harmonizable mixture kernels with variational Fourier features","volume-title":"Proc. Int. Conf. Artif. Intell. Stat","author":"Shen"},{"key":"ref82","first-page":"1078","article-title":"Bayesian nonparametric kernel learning","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Oliva"},{"key":"ref83","first-page":"1264","article-title":"Low-precision random Fourier features for memory-constrained kernel approximation","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Zhang"},{"key":"ref84","first-page":"3041","article-title":"Scalable kernel methods via doubly stochastic gradients","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Dai"},{"key":"ref85","first-page":"1020","article-title":"Structured adaptive and random spinners for fast machine learning computations","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Bojarski"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611970081"},{"issue":"1","key":"ref87","first-page":"714","article-title":"On the equivalence between kernel quadrature rules and random feature expansions","volume":"18","author":"Bach","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref88","first-page":"253","article-title":"Random Fourier features for kernel ridge regression: Approximation bounds and statistical guarantees","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Avron"},{"key":"ref89","first-page":"6369","article-title":"Quantization algorithms for random fourier features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref90","first-page":"476","article-title":"Nystr\u00f6m method vs random Fourier features: A theoretical and empirical comparison","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref91","first-page":"2341","article-title":"Scale up nonlinear component analysis with doubly stochastic gradients","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Xie"},{"key":"ref92","first-page":"1","article-title":"Triply stochastic gradients on multiple kernel learning.","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Li"},{"key":"ref93","first-page":"1203","article-title":"Unifying orthogonal Monte Carlo methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Choromanski"},{"key":"ref94","first-page":"1","article-title":"The geometry of random features","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Choromanski"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1016\/j.jco.2015.02.003"},{"key":"ref96","first-page":"6269","article-title":"Subgroup-based rank-1 lattice quasi-monte carlo","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yuan"},{"key":"ref97","volume-title":"Practical Numerical Integration","author":"Evans","year":"1993"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1016\/j.jeconom.2007.12.004"},{"key":"ref99","first-page":"1","article-title":"Kernel quadrature with DPPs","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Belhadji"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3120183"},{"key":"ref101","article-title":"A general scoring rule for randomized kernel approximation with application to canonical correlation analysis","author":"Wang","year":"2019"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1162\/0899766054323008"},{"key":"ref103","first-page":"185","article-title":"Sharp analysis of low-rank kernel matrix approximations","volume-title":"Proc. Conf. Learn. Theory","author":"Bach"},{"key":"ref104","first-page":"1","article-title":"Fast quantum algorithm for learning with optimized random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yamasaki"},{"key":"ref105","first-page":"775","article-title":"Fast randomized kernel ridge regression with statistical guarantees","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Alaoui"},{"key":"ref106","first-page":"1421","article-title":"Distributed adaptive sampling for kernel matrix approximation","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Calandriello"},{"key":"ref107","first-page":"1144","article-title":"Optimal rates for random Fourier features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sriperumbudur"},{"key":"ref108","article-title":"The error probability of random Fourier features is dimensionality independent","author":"Honorio","year":"2017"},{"key":"ref109","first-page":"3215","article-title":"Generalization properties of learning with random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rudi"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2008.4797607"},{"key":"ref111","first-page":"862","article-title":"On the error of random Fourier features","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Sutherland"},{"key":"ref112","first-page":"253","article-title":"Random Fourier features for kernel ridge regression: Approximation bounds and statistical guarantees","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Avron"},{"key":"ref113","first-page":"1365","article-title":"Streaming kernel principal component analysis","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Ghashami"},{"key":"ref114","article-title":"Statistical consistency of kernel PCA with random features","author":"Sriperumbudur","year":"2017"},{"key":"ref115","first-page":"7311","article-title":"Streaming kernel PCA with $\\tilde{O}(\\sqrt{n})$O\u02dc(n) random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Ullah"},{"key":"ref116","first-page":"226","article-title":"Optimal learning rates for kernel conjugate gradient regression","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Blanchard"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1017\/9781108627771"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-006-0196-8"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36169-3_4"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/s00365-006-0659-y"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1016\/j.acha.2017.11.005"},{"issue":"1","key":"ref122","first-page":"3202","article-title":"Distributed learning with regularized least squares","volume":"18","author":"Lin","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-22147-7"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1214\/009053605000000282"},{"issue":"108","key":"ref125","first-page":"1","article-title":"Towards a unified analysis of random fourier features","volume":"22","author":"Li","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref126","first-page":"10212","article-title":"Learning with SGD and random features","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Carratino"},{"key":"ref127","article-title":"Simple and almost assumption-free out-of-sample bound for random feature mapping","author":"Wang","year":"2019"},{"key":"ref128","first-page":"1871","article-title":"LIBLINEAR: A library for large linear classification","volume":"9","author":"Fan","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7496.003.0015"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref131","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref132","first-page":"1","article-title":"Harnessing the power of infinitely wide deep nets on small-data tasks","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Arora"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/s13398-014-0173-7.2"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1214\/21-aos2133"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.22008"},{"key":"ref137","first-page":"1","article-title":"On the multiple descent of minimum-norm interpolants and restricted lower isometry of kernels","volume-title":"Proc. Conf. Learn. Theory","author":"Liang"},{"key":"ref138","first-page":"1","article-title":"Kernel regression in high dimensions: Refined analysis beyond double descent","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Liu"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3446776"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1903070116"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/ac3a74"},{"key":"ref142","first-page":"541","article-title":"To understand deep learning we need to understand kernel learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Belkin"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref144","first-page":"6158","article-title":"Learning and generalization in overparameterized neural networks, going beyond two layers","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Allen-Zhu"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1090\/gsm\/132"},{"key":"ref146","first-page":"2634","article-title":"Nonlinear random matrix theory for deep learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Pennington"},{"key":"ref147","first-page":"3063","article-title":"On the spectrum of random features maps of high dimensional data","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liao"},{"key":"ref148","first-page":"13939","article-title":"A random matrix analysis of random fourier features: Beyond the gaussian kernel, a precise phase transition, and the corresponding double descent","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Liao"},{"key":"ref149","first-page":"1","article-title":"Generalization of two-layer neural networks: an asymptotic viewpoint","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Ba"},{"key":"ref150","article-title":"Double trouble in double descent: Bias and variance(s) in the lazy regime","author":"d\u2019Ascoli","year":"2020"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/ac3ae6"},{"key":"ref152","first-page":"11022","article-title":"Understanding double descent requires a fine-grained bias-variance decomposition","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Adlam"},{"key":"ref153","article-title":"A precise performance analysis of learning with random features","author":"Dhifallah","year":"2020"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2022.3217698"},{"key":"ref155","first-page":"4631","article-title":"Implicit regularization of random feature models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jacot"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1137\/20M1336072"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1103\/physrevresearch.4.013201"},{"key":"ref158","article-title":"What causes the test error? Going beyond bias-variance via anova","author":"Lin","year":"2020"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1142\/0271"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1103\/revmodphys.91.045002"},{"key":"ref161","first-page":"1683","article-title":"Regularized linear regression: A precise analysis of the estimation error","volume-title":"Proc. Conf. Learn. Theory","author":"Thrampoulidis"},{"key":"ref162","article-title":"The generalization error of max-margin linear classifiers: High-dimensional asymptotics in the overparametrized regime","author":"Montanari","year":"2019"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3714013"},{"key":"ref164","first-page":"2034","article-title":"Precise tradeoffs in adversarial training for linear regression","volume-title":"Proc. Conf. Learn. Theory","author":"Javanmard"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1214\/17-AAP1328"},{"key":"ref166","first-page":"6594","article-title":"On the power and limitations of random features for understanding neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yehudai"},{"key":"ref167","article-title":"On the approximation properties of random ReLU features","author":"Sun","year":"2018"},{"key":"ref168","first-page":"1","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Frankle"},{"key":"ref169","article-title":"Proving the lottery ticket hypothesis: Pruning is all you need","author":"Malach","year":"2020"},{"key":"ref170","article-title":"Network trimming: A data-driven neuron pruning approach towards efficient deep architectures","author":"Hu","year":"2016"},{"key":"ref171","first-page":"11022","article-title":"Kernel methods through the roof: Handling billions of points efficiently","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Meanti"},{"key":"ref172","article-title":"Multiple descent: Design your own generalization curve","author":"Chen","year":"2020"},{"key":"ref173","first-page":"1","article-title":"On the optimal weighted $\\ell _2$\u21132 regularization in overparameterized linear regression","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wu"},{"issue":"169","key":"ref174","first-page":"1","article-title":"The optimal ridge penalty for real-world high-dimensional data can be zero or negative due to the implicit ridge regularization","volume":"21","author":"Kobak","year":"2020","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/34\/9893033\/9495136-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9893033\/09495136.pdf?arnumber=9495136","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T22:47:09Z","timestamp":1705013229000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9495136\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,1]]},"references-count":174,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3097011","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,1]]}}}