{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T17:57:44Z","timestamp":1772301464464,"version":"3.50.1"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,7,9]],"date-time":"2023-07-09T00:00:00Z","timestamp":1688860800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,7,9]],"date-time":"2023-07-09T00:00:00Z","timestamp":1688860800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000028","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,7,9]]},"DOI":"10.1109\/dac56929.2023.10247716","type":"proceedings-article","created":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T17:31:31Z","timestamp":1694799091000},"page":"1-6","source":"Crossref","is-referenced-by-count":5,"title":["Dynamic Sparse Training via Balancing the Exploration-Exploitation Trade-off"],"prefix":"10.1109","author":[{"given":"Shaoyi","family":"Huang","sequence":"first","affiliation":[{"name":"University of Connecticut"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bowen","family":"Lei","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongkuan","family":"Xu","sequence":"additional","affiliation":[{"name":"North Carolina State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongwu","family":"Peng","sequence":"additional","affiliation":[{"name":"University of Connecticut"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yue","family":"Sun","sequence":"additional","affiliation":[{"name":"Lehigh University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mimi","family":"Xie","sequence":"additional","affiliation":[{"name":"University of Texas at San Antonio"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Caiwen","family":"Ding","sequence":"additional","affiliation":[{"name":"University of Connecticut"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Parameter efficient training of deep convolutional neural networks by dynamic sparse reparameterization","author":"mostafa","year":"2019","journal-title":"ICML"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-018-04316-3"},{"key":"ref15","article-title":"Sparse double descent: Where network pruning aggravates overfitting","author":"he","year":"2022","journal-title":"ICML"},{"key":"ref14","first-page":"2943","article-title":"Rigging the lottery: Making all tickets winners","author":"evci","year":"2020","journal-title":"ICML"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9277"},{"key":"ref30","article-title":"Deep rewiring: Training very sparse deep networks","author":"bellec","year":"2018","journal-title":"ICLRE"},{"key":"ref11","article-title":"Picking winning tickets before training by preserving gradient flow","author":"wang","year":"2020","journal-title":"ICLRE"},{"key":"ref33","first-page":"9908","article-title":"Sparse training via boosting pruning plasticity with neuroregeneration","volume":"34","author":"liu","year":"2021"},{"key":"ref10","article-title":"Snip: Single-shot network pruning based on connection sensitivity","author":"lee","year":"2019","journal-title":"ICLRE"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1177\/2056305119883428"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.16"},{"key":"ref1","first-page":"6989","article-title":"Do we actually need dense over-parameterization? in-time over-parameterization in sparse training","author":"liu","year":"2021","journal-title":"ICML"},{"key":"ref17","article-title":"Prune once for all: Sparse pre-trained language models","author":"zafrir","year":"2021"},{"key":"ref16","article-title":"Comparing rewinding and fine-tuning in neural network pruning","author":"renda","year":"2020"},{"key":"ref19","article-title":"A systematic dnn weight pruning framework using alternating direction method of multipliers","author":"zhang","year":"2018","journal-title":"ECCV"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2015.07.043"},{"key":"ref24","article-title":"Diverse neural network learns true target functions","author":"xie","year":"2017","journal-title":"Artificial Intelligence and Statistics"},{"key":"ref23","article-title":"Dsd: Dense-sparse-dense training for deep neural networks","author":"han","year":"2016"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.cobeha.2020.10.001"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1145\/2480741.2480752","article-title":"Exploration and exploitation in evolutionary algorithms: A survey","author":"?repin\u0161ek","year":"2013","journal-title":"CSUR"},{"key":"ref20","article-title":"Sparse networks from scratch: Faster training without losing performance","author":"dettmers","year":"2019"},{"key":"ref22","article-title":"Effective model sparsification by scheduled grow-and-prune methods","author":"ma","year":"2021","journal-title":"ICLRE"},{"key":"ref21","article-title":"Mest: Accurate and fast memory-economic sparse training framework on the edge","volume":"34","author":"yuan","year":"2021","journal-title":"NeurIPS"},{"key":"ref28","article-title":"Soft threshold weight reparameterization for learnable sparsity","author":"kusupati","year":"2020","journal-title":"ICML"},{"key":"ref27","article-title":"Pruning neural networks without any data by iteratively conserving synaptic flow","author":"tanaka","year":"2020","journal-title":"NeurIPS"},{"key":"ref29","article-title":"Sparsifying networks via subdifferential inclusion","author":"verma","year":"2021","journal-title":"ICML"},{"key":"ref8","author":"brown","year":"2020","journal-title":"Language models are few-shot learners"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643586"},{"key":"ref9","article-title":"Carbon emissions and large neural network training","author":"patterson","year":"2021"},{"key":"ref4","first-page":"1","article-title":"Et: re-thinking self-attention for transformer models on gpus","author":"chen","year":"2021","journal-title":"SC"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ISQED51717.2021.9424344"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD56317.2022.00048"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3453688.3461739"}],"event":{"name":"2023 60th ACM\/IEEE Design Automation Conference (DAC)","location":"San Francisco, CA, USA","start":{"date-parts":[[2023,7,9]]},"end":{"date-parts":[[2023,7,13]]}},"container-title":["2023 60th ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10247654\/10247655\/10247716.pdf?arnumber=10247716","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T17:40:54Z","timestamp":1696268454000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10247716\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,9]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/dac56929.2023.10247716","relation":{},"subject":[],"published":{"date-parts":[[2023,7,9]]}}}