{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T15:30:19Z","timestamp":1773329419535,"version":"3.50.1"},"reference-count":87,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"KLA and the Tel Aviv University Center for Artificial Intelligence and Data Science"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1109\/tpami.2025.3598343","type":"journal-article","created":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T19:43:34Z","timestamp":1755546214000},"page":"12069-12076","source":"Crossref","is-referenced-by-count":1,"title":["Pruning at Initialization \u2013 A Sketching Perspective"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2873-7958","authenticated-orcid":false,"given":"Noga","family":"Bar","sequence":"first","affiliation":[{"name":"Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2830-0297","authenticated-orcid":false,"given":"Raja","family":"Giryes","sequence":"additional","affiliation":[{"name":"Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Frankle"},{"key":"ref2","article-title":"SNIP: Single-shot network pruning based on connection sensitivity","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Lee","year":"2019"},{"key":"ref3","first-page":"6377","article-title":"Pruning neural networks without any data by iteratively conserving synaptic flow","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Tanaka","year":"2020"},{"key":"ref4","article-title":"Picking winning tickets before training by preserving gradient flow","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Wang","year":"2020"},{"key":"ref5","article-title":"Progressive skeletonization: Trimming more fat from a network at initialization","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Jorge","year":"2021"},{"key":"ref6","article-title":"Prospect Pruning: Finding trainable weights at initialization using meta-gradients","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Alizadeh","year":"2022"},{"key":"ref7","article-title":"A signal propagation perspective for pruning neural networks at initialization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee","year":"2020"},{"key":"ref8","first-page":"14529","article-title":"Rare Gems: Finding lottery tickets at initialization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Sreenivasan","year":"2022"},{"key":"ref9","first-page":"2943","article-title":"Rigging the lottery: Making all tickets winners","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Evci","year":"2020"},{"key":"ref10","article-title":"One ticket to win them all: Generalizing lottery ticket initializations across datasets and optimizers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Morcos","year":"2019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01604"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.5220\/0010196300590069"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_40"},{"key":"ref14","first-page":"20390","article-title":"Sanity-checking pruning methods: Random tickets can win the jackpot","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"33","author":"Su","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01191"},{"key":"ref16","first-page":"6682","article-title":"Proving the lottery ticket hypothesis: Pruning is all you need","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Malach","year":"2020"},{"key":"ref17","first-page":"2599","article-title":"Optimal lottery tickets via subsetsum: Logarithmic over-parameterization is sufficient","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"33","author":"Pensia","year":"2020"},{"key":"ref18","article-title":"Proving the lottery ticket hypothesis for convolutional neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Cunha","year":"2021"},{"key":"ref19","article-title":"On the existence of universal lottery tickets","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Burkholz","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539704442684"},{"key":"ref21","first-page":"244","article-title":"On the optimization of deep networks: Implicit acceleration by overparameterization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arora","year":"2018"},{"key":"ref22","article-title":"SGD learns over-parameterized networks that provably generalize on linearly separable data","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Brutzkus","year":"2018"},{"key":"ref23","first-page":"2691","article-title":"Exponential convergence time of gradient descent for one-dimensional deep linear neural networks","volume-title":"Proc. Conf. Learn. Theory","volume":"99","author":"Shamir","year":"2019"},{"key":"ref24","first-page":"7413","article-title":"Implicit regularization in deep matrix factorization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Arora","year":"2019"},{"key":"ref25","article-title":"Towards resolving the implicit bias of gradient descent for matrix factorization: Greedy low-rank learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li","year":"2021"},{"key":"ref26","article-title":"A convergence analysis of gradient descent for deep linear neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Arora","year":"2019"},{"key":"ref27","first-page":"16270","article-title":"Implicit bias of the step size in linear diagonal neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"162","author":"Nacson","year":"2022"},{"key":"ref28","article-title":"A unifying view on implicit bias in training linear neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yun","year":"2021"},{"key":"ref29","article-title":"Neural tangent kernel: Convergence and generalization in neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Jacot","year":"2018"},{"key":"ref30","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref31","article-title":"Tiny ImageNet challenge","author":"Wu","year":"2017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref33","article-title":"Skeletonization: A technique for trimming the fat from a network via relevance assessment","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"1","author":"Mozer","year":"1988"},{"key":"ref34","article-title":"Pruning convolutional neural networks for resource efficient inference","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Molchanov","year":"2017"},{"key":"ref35","article-title":"Optimal brain damage","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"2","author":"LeCun","year":"1989"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1993.298572"},{"key":"ref37","article-title":"Learning both weights and connections for efficient neural network","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Han","year":"2015"},{"key":"ref38","article-title":"Dynamic network surgery for efficient DNNs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Guo","year":"2016"},{"key":"ref39","first-page":"20852","article-title":"The generalization-stability tradeoff in neural network pruning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Bartoldson","year":"2020"},{"key":"ref40","article-title":"A back-propagation algorithm with optimal use of hidden units","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"1","author":"Chauvin","year":"1988"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00890"},{"key":"ref42","article-title":"Learning sparse neural networks through L_0 regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Louizos","year":"2018"},{"key":"ref43","article-title":"Deep rewiring: Training very sparse deep networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bellec","year":"2018"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-018-04316-3"},{"key":"ref45","first-page":"4646","article-title":"Parameter efficient training of deep convolutional neural networks by dynamic sparse reparameterization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mostafa","year":"2019"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.126957"},{"key":"ref47","first-page":"7588","article-title":"Neural architecture search without training","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mellor","year":"2021"},{"key":"ref48","first-page":"28454","article-title":"How powerful are performance predictors in neural architecture search?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"White","year":"2021"},{"key":"ref49","article-title":"Zero-cost proxies for lightweight NAS","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Abdelfattah","year":"2021"},{"key":"ref50","first-page":"254","article-title":"Stronger generalization bounds for deep nets via a compression approach","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arora","year":"2018"},{"key":"ref51","article-title":"Tensorizing neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Novikov","year":"2015"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.88"},{"key":"ref53","article-title":"OTOV2: Automatic, generic, user-friendly","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Chen","year":"2023"},{"key":"ref54","first-page":"19637","article-title":"Only train once: A one-shot neural network training and pruning framework","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen","year":"2021"},{"issue":"5","key":"ref55","doi-asserted-by":"crossref","first-page":"3203","DOI":"10.1109\/TCSVT.2021.3095970","article-title":"An overview of neural network compression","volume":"32","author":"Neill","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"ref56","first-page":"18293","article-title":"Winning the lottery ahead of time: Efficient early network pruning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rachwan","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00029"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_17"},{"key":"ref59","article-title":"Single shot structured pruning before training","author":"Amersfoort","year":"2020"},{"key":"ref60","article-title":"Lottery tickets on a data diet: Finding initializations with sparse trainable networks","volume-title":"Proc. NeurIPS","author":"Paul","year":"2022"},{"key":"ref61","first-page":"30196","article-title":"Validating the lottery ticket hypothesis with inertial manifold theory","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang","year":"2021"},{"key":"ref62","first-page":"6336","article-title":"Finding trainable sparse networks through neural tangent transfer","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2020"},{"key":"ref63","article-title":"Drawing early-bird tickets: Towards more efficient training of deep networks","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"You","year":"2020"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897980"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01193"},{"key":"ref66","first-page":"12380","article-title":"Efficient lottery ticket finding: Less data is more","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2021"},{"key":"ref67","article-title":"Provably efficient lottery ticket discovery","author":"Wolfe","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref68","first-page":"3259","article-title":"Linear mode connectivity and the lottery ticket hypothesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Frankle","year":"2020"},{"key":"ref69","first-page":"2925","article-title":"Logarithmic pruning is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Orseau","year":"2020"},{"key":"ref70","article-title":"Towards strong pruning for lottery tickets with non-zero biases","author":"Fischer","year":"2021"},{"key":"ref71","article-title":"Plant\u2019n\u2019seek: Can you find the winning ticket?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fischer","year":"2022"},{"key":"ref72","article-title":"Pruning randomly initialized neural networks with iterative randomization","volume-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Chijiwa","year":"2021"},{"key":"ref73","first-page":"8432","article-title":"PHEW: Constructing sparse networks that learn fast and generalize well without training data","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Patil","year":"2021"},{"key":"ref74","article-title":"A unified paths perspective for pruning at initialization","author":"Gebhart","year":"2021"},{"key":"ref75","article-title":"Towards data-agnostic pruning at initialization: What makes a good sparse mask?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Pham","year":"2024"},{"key":"ref76","article-title":"Ntk-sap: Improving neural network pruning by aligning training dynamics","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Wang","year":"2022"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.4467\/20838476SI.16.004.6185"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10894"},{"key":"ref79","article-title":"Wide neural networks of any depth evolve as linear models under gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Lee","year":"2019"},{"key":"ref80","first-page":"322","article-title":"Fine-grained analysis of optimization and generalization for overparameterized two-layer neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arora","year":"2019"},{"key":"ref81","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.87"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref85","article-title":"Openlth: A framework for lottery tickets and beyond","author":"Frankle","year":"2020"},{"key":"ref86","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"Proc. 13th Int. Conf. Artif. Intell. statistics. JMLR Workshop Conf. Proc.","author":"Glorot","year":"2010"},{"issue":"19","key":"ref87","first-page":"28","article-title":"Identifiability in two-layer sparse matrix factorization","volume":"20","author":"Zheng","year":"2021","journal-title":"Complexity"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11230086\/11129182.pdf?arnumber=11129182","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T18:53:09Z","timestamp":1762455189000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11129182\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":87,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3598343","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12]]}}}