{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T16:52:41Z","timestamp":1776876761725,"version":"3.51.2"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100010628","name":"Universidad de La Sabana","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010628","id-type":"DOI","asserted-by":"publisher"}]},{"name":"TAU Team"},{"name":"Agence Nationale de la Recherche, National Agency for Research","award":["ANR-19-CHIA-0022"],"award-info":[{"award-number":["ANR-19-CHIA-0022"]}]},{"name":"IBISC, Univ Evry, Universit&#x00E9; Paris-Saclay"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/jstsp.2024.3501685","type":"journal-article","created":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T18:55:28Z","timestamp":1731956128000},"page":"997-1009","source":"Crossref","is-referenced-by-count":2,"title":["DARIO: Differentiable Vision Transformer Pruning With Low-Cost Proxies"],"prefix":"10.1109","volume":"18","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7202-2223","authenticated-orcid":false,"given":"Haozhe","family":"Sun","sequence":"first","affiliation":[{"name":"LISN, INRIA, CNRS, Universit&#x00E9; Paris-Saclay, Gif-sur-Yvette, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandre","family":"Heuillet","sequence":"additional","affiliation":[{"name":"IBISC, Univ Evry, Universit&#x00E9; Paris-Saclay, Gif-sur-Yvette, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9293-2424","authenticated-orcid":false,"given":"Felix","family":"Mohr","sequence":"additional","affiliation":[{"name":"Universidad de la Sabana, Ch&#x00ED;a, Colombia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hedi","family":"Tabia","sequence":"additional","affiliation":[{"name":"IBISC, Univ Evry, Universit&#x00E9; Paris-Saclay, Gif-sur-Yvette, France"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref3","article-title":"MobileViT: Light-weight, general-purpose, and mobile-friendly vision transformer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mehta","year":"2022"},{"key":"ref4","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Han","year":"2016"},{"key":"ref5","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. Neural Inf. Process. Syst. Deep Learn. Workshop","author":"Hinton","year":"2015"},{"key":"ref6","first-page":"598","article-title":"Optimal brain damage","volume-title":"Proc. 2nd Int. Conf. Neural Inf. Process. Syst.","author":"LeCun","year":"1989"},{"key":"ref7","first-page":"1135","article-title":"Learning both weights and connections for efficient neural network","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","author":"Han","year":"2015"},{"key":"ref8","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Frankle","year":"2019"},{"key":"ref9","article-title":"Vision transformer pruning","volume-title":"Proc. Workshop Model Mining","author":"Zhu","year":"2021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00436"},{"key":"ref11","first-page":"24101","article-title":"A fast post-training pruning framework for transformers","volume-title":"Proc. 36th Int. Conf. Neural Inf. Process. Syst.","author":"Kwon","year":"2022"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20222"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3646-6"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01779"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.aacl-main.88"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.107"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157329"},{"key":"ref19","article-title":"Pruning filters for efficient ConvNets","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li","year":"2017"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"ref21","first-page":"14037","article-title":"Are sixteen heads really better than one?","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Michel","year":"2019"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.829"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3446640"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191770"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01544"},{"key":"ref26","article-title":"Layer-adaptive sparsity for the magnitude-based pruning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee","year":"2021"},{"key":"ref27","article-title":"Neural pruning via growing regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2021"},{"key":"ref28","article-title":"The state of sparsity in deep neural networks","author":"Gale","year":"2019"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01152"},{"key":"ref30","first-page":"21702","article-title":"LLM-Pruner: On the structural pruning of large language models","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst.","author":"Ma","year":"2023"},{"key":"ref31","article-title":"Zero-cost proxies for lightweight NAS","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Abdelfattah","year":"2021"},{"key":"ref32","article-title":"Neural architecture search with reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zoph","year":"2017"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3665138"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3447582"},{"key":"ref35","article-title":"DARTS: Differentiable architecture search","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Liu","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2023.10.019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00492"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3520304.3528936"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611762"},{"key":"ref41","article-title":"Multi-exit vision transformer for dynamic inference","volume-title":"Proc. 32nd Brit. Mach. Vis. Conf.","author":"Bakhtiarnia","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"ref43","first-page":"2516","article-title":"Zero time waste: Recycling predictions in early exit neural networks","volume-title":"Proc. 35th Int. Conf. Neural Inf. Process. Syst.","author":"Woczyk","year":"2021"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref45","first-page":"3232","article-title":"Meta-album: Multi-domain meta-dataset for few-shot image classification","volume-title":"Proc. 36th Conf. Neural Inf. Process. Syst. Datasets Benchmarks Track","author":"Ullah","year":"2022"},{"key":"ref46","first-page":"6377","article-title":"Pruning neural networks without any data by iteratively conserving synaptic flow","volume":"33","author":"Tanaka","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref47","volume-title":"Model Selection and Multi-Model Inference","volume":"63","author":"Anderson","year":"2002"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1168\/2\/022022"},{"issue":"32","key":"ref49","first-page":"15849","article-title":"Reconciling modern machine-learning practice and the classical biasvariance trade-off","volume-title":"Proc. Nat. Acad. Sci.","volume":"116","author":"Belkin","year":"2019"},{"key":"ref50","first-page":"10767","article-title":"Rethinking bias-variance trade-off for generalization of neural networks","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevResearch.4.013201"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.3003837"},{"key":"ref53","first-page":"36738","article-title":"An optimal structured zeroth-order algorithm for non-smooth optimization","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Rando","year":"2024"},{"key":"ref54","first-page":"1356","article-title":"Stochastic zeroth-order optimization in high dimensions","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Wang","year":"2018"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2003.1227749"},{"key":"ref56","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/1296907.1296909"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.4208\/cicp.110113.010813a"},{"key":"ref60","first-page":"26809","article-title":"PLATON: Pruning large transformer models with upper confidence bound of weight importance","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Zhang","year":"2022"},{"issue":"1","key":"ref61","first-page":"72","article-title":"The proof and measurement of association between two things","volume-title":"Amer. J. Psychol.","volume":"15","author":"Spearman","year":"1904"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/4200690\/10852353\/10756630.pdf?arnumber=10756630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T18:40:50Z","timestamp":1738867250000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10756630\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":61,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2024.3501685","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"value":"1932-4553","type":"print"},{"value":"1941-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}