{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T17:32:56Z","timestamp":1758043976399,"version":"3.44.0"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s10618-025-01118-2","type":"journal-article","created":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T09:47:30Z","timestamp":1752227250000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning a consensus sub-network with polarization regularization and one pass training"],"prefix":"10.1007","volume":"39","author":[{"given":"Xiaoying","family":"Zhi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Varun","family":"Babbar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rundong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pheobe","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fran","family":"Silavong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruibo","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sean","family":"Moran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,11]]},"reference":[{"issue":"12","key":"1118_CR1","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1145\/3381831","volume":"63","author":"R Schwartz","year":"2020","unstructured":"Schwartz R, Dodge J, Smith NA, Etzioni O (2020) Green ai. Commun ACM 63(12):54\u201363. https:\/\/doi.org\/10.1145\/3381831","journal-title":"Commun ACM"},{"key":"1118_CR2","doi-asserted-by":"publisher","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N. (2020) An Image is worth 16x16 words: transformers for image recognition at scale. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2010.11929","DOI":"10.48550\/ARXIV.2010.11929"},{"issue":"7","key":"1118_CR3","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1145\/3360307","volume":"63","author":"NP Jouppi","year":"2020","unstructured":"Jouppi NP, Yoon DH, Kurian G, Li S, Patil N, Laudon J, Young C, Patterson D (2020) A domain-specific supercomputer for training deep neural networks. Commun ACM 63(7):67\u201378. https:\/\/doi.org\/10.1145\/3360307","journal-title":"Commun ACM"},{"key":"1118_CR4","unstructured":"EIA US: 2020 Average monthly bill- residential. https:\/\/www.eia.gov\/electricity\/sales_revenue_price\/pdf\/table5_a.pdf. Accessed: 2022-07-19 (2021)"},{"key":"1118_CR5","doi-asserted-by":"publisher","unstructured":"Denil M, Shakibi B, Dinh L, Ranzato M, Freitas N (2013) Predicting parameters in deep learning. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1306.0543. arxiv:1306.0543","DOI":"10.48550\/ARXIV.1306.0543"},{"key":"1118_CR6","doi-asserted-by":"publisher","unstructured":"Shafiee M.S, Shafiee M.J, Wong A (2018) Dynamic representations toward efficient inference on deep neural networks by decision gates. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1811.01476. arxiv:1811.01476","DOI":"10.48550\/ARXIV.1811.01476"},{"issue":"10","key":"1118_CR7","doi-asserted-by":"publisher","first-page":"2525","DOI":"10.1109\/TPAMI.2018.2858232","volume":"41","author":"J Luo","year":"2019","unstructured":"Luo J, Zhang H, Zhou H, Xie C, Wu J, Lin W (2019) Thinet: pruning cnn filters for a thinner net. IEEE Trans Pattern Anal \u2018I &\u2019 Mach Intell 41(10):2525\u20132538. https:\/\/doi.org\/10.1109\/TPAMI.2018.2858232","journal-title":"IEEE Trans Pattern Anal \u2018I &\u2019 Mach Intell"},{"key":"1118_CR8","unstructured":"Cheong R (2019) transformers. zip: Compressing transformers with pruning and quantization"},{"key":"1118_CR9","doi-asserted-by":"publisher","unstructured":"Zhang M.S, Stadie B (2019) One-shot pruning of recurrent neural networks by jacobian spectrum evaluation. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1912.00120. arxiv:1912.00120","DOI":"10.48550\/ARXIV.1912.00120"},{"key":"1118_CR10","doi-asserted-by":"publisher","unstructured":"Lin Z, Liu J, Yang Z, Hua N, Roth D (2020) Pruning redundant mappings in transformer models via spectral-normalized identity prior. In: Findings of the association for computational linguistics: EMNLP 2020 pp. 719\u2013730. Association for Computational Linguistics Online. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.64. https:\/\/aclanthology.org\/2020.findings-emnlp.64","DOI":"10.18653\/v1\/2020.findings-emnlp.64"},{"key":"1118_CR11","doi-asserted-by":"publisher","unstructured":"Han S, Pool J, Tran J, Dally W.J (2015) Learning both weights and connections for efficient neural networks. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1506.02626. arxiv:1506.02626","DOI":"10.48550\/ARXIV.1506.02626"},{"key":"1118_CR12","doi-asserted-by":"publisher","unstructured":"Zhu M, Tang Y, Han K (2021) Vision transformer pruning. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2104.08500. arxiv.:2104.08500","DOI":"10.48550\/ARXIV.2104.08500"},{"key":"1118_CR13","doi-asserted-by":"publisher","unstructured":"Hou Z, Qin M, Sun F, Ma X, Yuan K, Xu Y, Chen Y.-K, Jin R, Xie Y, Kung S.-Y (2022) CHEX: CHannel EXploration for CNN model compression. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2203.15794. arxiv.:2203.15794","DOI":"10.48550\/ARXIV.2203.15794"},{"key":"1118_CR14","doi-asserted-by":"publisher","unstructured":"Veit A, Belongie S (2017) Convolutional networks with adaptive inference graphs. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1711.11503. arxiv:1711.11503","DOI":"10.48550\/ARXIV.1711.11503"},{"key":"1118_CR15","doi-asserted-by":"publisher","unstructured":"Gao X, Zhao Y, Dudziak L, Mullins R, Xu C.-z. (2018) Dynamic channel pruning: feature boosting and suppression. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1810.05331. arxiv:1810.05331","DOI":"10.48550\/ARXIV.1810.05331"},{"key":"1118_CR16","doi-asserted-by":"publisher","unstructured":"Bejnordi B.E, Blankevoort T, Welling M. (2019) Batch-shaping for learning conditional channel gated networks. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1907.06627. arxiv:1907.06627","DOI":"10.48550\/ARXIV.1907.06627"},{"key":"1118_CR17","doi-asserted-by":"publisher","unstructured":"Yin H, Vahdat A, Alvarez J, Mallya A, Kautz J, Molchanov P (2021) A-ViT: Adaptive tokens for efficient vision transformer. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2112.07658. arxiv:2112.07658","DOI":"10.48550\/ARXIV.2112.07658"},{"key":"1118_CR18","doi-asserted-by":"publisher","unstructured":"Lee N, Ajanthan T, Torr P.H.S (2018) SNIP: Single-shot network pruning based on connection sensitivity. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1810.02340. arxiv:1810.02340","DOI":"10.48550\/ARXIV.1810.02340"},{"key":"1118_CR19","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Deep residual learning for image recognition. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1512.03385. arxiv.:1512.03385","DOI":"10.48550\/ARXIV.1512.03385"},{"key":"1118_CR20","unstructured":"Krizhevsky A (2009) Learning multiple layers of features from tiny images. https:\/\/www.cs.toronto.edu\/texttildelowkriz\/learning-features-2009-TR.pdf"},{"key":"1118_CR21","unstructured":"Le Y, Yang X.S (2015) Tiny imagenet visual recognition challenge. https:\/\/api.semanticscholar.org\/CorpusID:16664790"},{"key":"1118_CR22","doi-asserted-by":"publisher","unstructured":"Patterson D, Gonzalez J, Le Q, Liang C, Munguia L.-M, Rothchild D, So D, Texier M, Dean J (2021) Carbon emissions and large neural network training. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2104.10350. arxiv.:2104.10350","DOI":"10.48550\/ARXIV.2104.10350"},{"key":"1118_CR23","doi-asserted-by":"publisher","unstructured":"Dodge J, Prewitt T, Combes R.T.D, Odmark E, Schwartz R, Strubell E, Luccioni A.S, Smith N.A, DeCario N, Buchanan W (2022) Measuring the carbon intensity of ai in cloud instances. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2206.05229. arxiv:2206.05229","DOI":"10.48550\/ARXIV.2206.05229"},{"key":"1118_CR24","doi-asserted-by":"publisher","unstructured":"Gholami A, Kim S, Dong Z, Yao Z, Mahoney M.W, Keutzer K (2021) A survey of quantization methods for efficient neural network inference. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2103.13630. arxiv:2103.13630","DOI":"10.48550\/ARXIV.2103.13630"},{"key":"1118_CR25","doi-asserted-by":"publisher","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1503.02531. arxiv:1503.02531","DOI":"10.48550\/ARXIV.1503.02531"},{"key":"1118_CR26","doi-asserted-by":"publisher","unstructured":"Wang W, Chen M, Zhao S, Chen L, Hu J, Liu H, Cai D, He X, Liu W (2020)Accelerate CNNs from three dimensions: a comprehensive pruning framework. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2010.04879. arxiv:2010.04879","DOI":"10.48550\/ARXIV.2010.04879"},{"key":"1118_CR27","doi-asserted-by":"publisher","unstructured":"Li H, Kadav A, Durdanovic I, Samet H, Graf H.P (2016) Pruning filters for efficient convnets. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1608.08710. arxiv:1608.08710","DOI":"10.48550\/ARXIV.1608.08710"},{"key":"1118_CR28","doi-asserted-by":"publisher","unstructured":"He Y, Zhang X, Sun J (2017) Channel pruning for accelerating very deep neural networks. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1707.06168. arxiv:1707.06168","DOI":"10.48550\/ARXIV.1707.06168"},{"key":"1118_CR29","doi-asserted-by":"publisher","unstructured":"Cai H, Gan C, Wang T, Zhang Z, Han S. (2019) Once-for-all: train one network and specialize it for efficient deployment. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1908.09791. arxiv:1908.09791","DOI":"10.48550\/ARXIV.1908.09791"},{"key":"1118_CR30","doi-asserted-by":"crossref","unstructured":"He Y, Lin J, Liu Z, Wang H, Li L.-J, Han S. (2018) AMC: AutoML for model compression and acceleration on mobile devices. In: Computer Vision\u2013 ECCV 2018 pp. 815\u2013832. Springer, Berlin Heidelberg doi 10.1007\/978-3-030-01234-2_48","DOI":"10.1007\/978-3-030-01234-2_48"},{"key":"1118_CR31","doi-asserted-by":"crossref","unstructured":"He Y, Kang G, Dong X, Fu Y, Yang Y. (2018) Soft filter pruning for accelerating deep convolutional neural networks. In: international joint conference on artificial intelligence (IJCAI) pp. 2234\u20132240","DOI":"10.24963\/ijcai.2018\/309"},{"key":"1118_CR32","unstructured":"Li H, Kadav A, Durdanovic I, Samet H, Graf H.P. (2016) Pruning filters for efficient convnets. ArXiv abs\/1608.08710"},{"key":"1118_CR33","volume-title":"Guyon I","author":"J Lin","year":"2017","unstructured":"Lin J, Rao Y, Lu J, Zhou J (2017) Runtime neural pruning. In: Luxburg UV, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R (eds) Guyon I. Advances in Neural Information Processing Systems. Curran Associates, Inc., Red Hook"},{"key":"1118_CR34","doi-asserted-by":"publisher","unstructured":"He Y, Zhang X, Sun J (2017) Channel pruning for accelerating very deep neural networks. In: 2017 IEEE international conference on computer vision (ICCV) pp. 1398\u20131406.https:\/\/doi.org\/10.1109\/ICCV.2017.155","DOI":"10.1109\/ICCV.2017.155"},{"key":"1118_CR35","doi-asserted-by":"publisher","unstructured":"Lee Y (2019) Differentiable sparsification for deep neural networks. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1910.03201. arxiv:1910.03201","DOI":"10.48550\/ARXIV.1910.03201"},{"key":"1118_CR36","doi-asserted-by":"publisher","unstructured":"Wortsman M, Farhadi A, Rastegari M (2019) Discovering neural wirings. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1906.00586. arxiv:1906.00586","DOI":"10.48550\/ARXIV.1906.00586"},{"key":"1118_CR37","doi-asserted-by":"publisher","unstructured":"Ramanujan V, Wortsman M, Kembhavi A, Farhadi A, Rastegari M (2019) What\u2019s hidden in a randomly weighted neural network? arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1911.13299. arxiv:1911.13299","DOI":"10.48550\/ARXIV.1911.13299"},{"key":"1118_CR38","unstructured":"Frankle J, Carbin M (2019) The lottery ticket hypothesis: Finding sparse, trainable neural networks. In: 7th international conference on learning representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net???. https:\/\/openreview.net\/forum?id=rJl-b3RcF7"},{"key":"1118_CR39","doi-asserted-by":"publisher","unstructured":"Jang E, Gu S, Poole B (2016) Categorical reparameterization with gumbel-softmax. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1611.01144. arxiv:1611.01144","DOI":"10.48550\/ARXIV.1611.01144"},{"key":"1118_CR40","doi-asserted-by":"publisher","unstructured":"Kusner M.J, Hern\u00e1ndez-Lobato J.M (2016) GANS for Sequences of Discrete Elements with the Gumbel-softmax Distribution. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1611.04051. arxiv:1611.04051","DOI":"10.48550\/ARXIV.1611.04051"},{"key":"1118_CR41","first-page":"21031","volume-title":"Advances in Neural Information Processing Systems","author":"J Shen","year":"2021","unstructured":"Shen J, Zhen X, Worring M, Shao L (2021) Variational multi-task learning with gumbel-softmax priors. In: Ranzato M, Beygelzimer A, Dauphin Y, Liang PS, Vaughan JW (eds) Advances in Neural Information Processing Systems, vol 34. Curran Associates, Inc., Red Hook, pp 21031\u201321042"},{"key":"1118_CR42","doi-asserted-by":"publisher","unstructured":"Chang J, Zhang X, Guo Y, Meng G, Xiang S, Pan C (2019) Differentiable architecture search with ensemble gumbel-softmax. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1905.01786. arxiv:1905.01786","DOI":"10.48550\/ARXIV.1905.01786"},{"key":"1118_CR43","doi-asserted-by":"publisher","unstructured":"Bengio Y, L\u00e9onard N, Courville A (2013) Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1308.3432. arxiv:1308.3432","DOI":"10.48550\/ARXIV.1308.3432"},{"key":"1118_CR44","doi-asserted-by":"publisher","unstructured":"Lin S, Ji R, Li Y, Deng C, Li X (2019) Towards compact convNets via structure-sparsity regularized filter pruning. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1901.07827. arxiv:1901.07827","DOI":"10.48550\/ARXIV.1901.07827"},{"key":"1118_CR45","doi-asserted-by":"publisher","unstructured":"Li Y, Gu S, Mayer C, Van\u00a0Gool L, Timofte R (2020) Group sparsity: the hinge between filter pruning and decomposition for network compression. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.2003.08935. arxiv:2003.08935","DOI":"10.48550\/ARXIV.2003.08935"},{"key":"1118_CR46","doi-asserted-by":"publisher","unstructured":"Srinivas S, Babu R.V (2015) Learning neural network architectures using backpropagation. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1511.05497. arxiv:1511.05497","DOI":"10.48550\/ARXIV.1511.05497"},{"key":"1118_CR47","first-page":"9865","volume-title":"Advances in Neural Information Processing Systems","author":"T Zhuang","year":"2020","unstructured":"Zhuang T, Zhang Z, Huang Y, Zeng X, Shuang K, Li X (2020) Neuron-level structured pruning using polarization regularizer. In: Larochelle H, Ranzato M, Hadsell R, Balcan MF, Lin H (eds) Advances in Neural Information Processing Systems, vol 33. Curran Associates, Inc., Red Hook, pp 9865\u20139877"},{"key":"1118_CR48","doi-asserted-by":"publisher","unstructured":"Yin P, Lyu J, Zhang S, Osher S, Qi Y, Xin J (2019) Understanding straight-through estimator in training activation quantized neural nets. arXiv. https:\/\/doi.org\/10.48550\/ARXIV.1903.05662. arxiv:1903.05662","DOI":"10.48550\/ARXIV.1903.05662"},{"key":"1118_CR49","unstructured":"Kim J.-H, Choo W, Song H.O. (2020) Puzzle mix: Exploiting saliency and local statistics for optimal mixup. In: international conference on machine learning (ICML)"},{"key":"1118_CR50","unstructured":"Dekhovich A, Tax D.M.J, Sluiter M.H.F, Bessa M.A (2021) Neural network relief: a pruning algorithm based on neural activity. CoRR abs\/2109.10795 arXiv:2109.10795"},{"key":"1118_CR51","doi-asserted-by":"publisher","unstructured":"Zhao C, Ni B, Zhang J, Zhao Q, Zhang W, Tian Q (2019) Variational convolutional neural network pruning. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR) pp. 2775\u20132784. https:\/\/doi.org\/10.1109\/CVPR.2019.00289","DOI":"10.1109\/CVPR.2019.00289"},{"issue":"2","key":"1118_CR52","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1109\/TEVC.2017.2712906","volume":"22","author":"A Sinha","year":"2018","unstructured":"Sinha A, Malo P, Deb K (2018) A review on bilevel optimization: from classical to evolutionary approaches and applications. IEEE Trans Evolut Comput 22(2):276\u2013295. https:\/\/doi.org\/10.1109\/TEVC.2017.2712906","journal-title":"IEEE Trans Evolut Comput"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-025-01118-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-025-01118-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-025-01118-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T10:29:58Z","timestamp":1757672998000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-025-01118-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,11]]},"references-count":52,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["1118"],"URL":"https:\/\/doi.org\/10.1007\/s10618-025-01118-2","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"type":"print","value":"1384-5810"},{"type":"electronic","value":"1573-756X"}],"subject":[],"published":{"date-parts":[[2025,7,11]]},"assertion":[{"value":"29 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"50"}}