{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T02:09:24Z","timestamp":1778551764732,"version":"3.51.4"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,7,4]],"date-time":"2020-07-04T00:00:00Z","timestamp":1593820800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,7,4]],"date-time":"2020-07-04T00:00:00Z","timestamp":1593820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Verne M. William Professorship Fund from Penn State University"},{"DOI":"10.13039\/501100002920","name":"Research Grants Council, University Grants Committee","doi-asserted-by":"publisher","award":["HKUST16301218"],"award-info":[{"award-number":["HKUST16301218"]}],"id":[{"id":"10.13039\/501100002920","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Penn State and Peking University Joint Center for Computational Mathematics and Applications"},{"name":"Penn State and Peking University Joint Center for Computational Mathematics and Applications"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comput Optim Appl"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1007\/s10589-020-00202-1","type":"journal-article","created":{"date-parts":[[2020,7,4]],"date-time":"2020-07-04T05:04:01Z","timestamp":1593839041000},"page":"163-182","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Make $$\\ell _1$$ regularization effective in training sparse CNN"],"prefix":"10.1007","volume":"77","author":[{"given":"Juncai","family":"He","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaodong","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinchao","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5659-0167","authenticated-orcid":false,"given":"Lian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,7,4]]},"reference":[{"key":"202_CR1","unstructured":"Alvarez, J.M., Salzmann, M.: Learning the number of neurons in deep networks. In: Advances in Neural Information Processing Systems, pp. 2270\u20132278 (2016)"},{"key":"202_CR2","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s10107-011-0472-0","volume":"129","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas, D.P.: Incremental proximal methods for large scale convex optimization. Math. Program. 129, 163 (2011)","journal-title":"Math. Program."},{"issue":"2","key":"202_CR3","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1109\/TIT.2005.862083","volume":"52","author":"EJ Cand\u00e8s","year":"2006","unstructured":"Cand\u00e8s, E.J., Romberg, J., Tao, T.: Robust uncertainty principles: exact signal reconstruction from highly incomplete frequency information. IEEE Trans. Inf. Theory 52(2), 489\u2013509 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"202_CR4","unstructured":"Cheng, Y., Wang, D., Zhou, P., Zhang, T.: A survey of model compression and acceleration for deep neural networks (2017). arXiv preprint arXiv:1710.09282"},{"issue":"4","key":"202_CR5","doi-asserted-by":"publisher","first-page":"1289","DOI":"10.1109\/TIT.2006.871582","volume":"52","author":"DL Donoho","year":"2006","unstructured":"Donoho, D.L.: Compressed sensing. IEEE Trans. Inf. Theory 52(4), 1289\u20131306 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"Dec","key":"202_CR6","first-page":"2899","volume":"10","author":"J Duchi","year":"2009","unstructured":"Duchi, J., Singer, Y.: Efficient online and batch learning using forward backward splitting. J. Mach. Learn. Res. 10(Dec), 2899\u20132934 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"202_CR7","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511794308","volume-title":"Compressed Sensing: Theory and Applications","author":"YC Eldar","year":"2012","unstructured":"Eldar, Y.C., Kutyniok, G.: Compressed Sensing: Theory and Applications. Cambridge University Press, Cambridge (2012)"},{"key":"202_CR8","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, pp. 249\u2013256 (2010)"},{"key":"202_CR9","unstructured":"Han, S., Mao, H., Dally, W.J.: Deep compression: compressing deep neural networks with pruning, trained quantization and Huffman coding (2015). arXiv preprint arXiv:1510.00149"},{"key":"202_CR10","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.: Learning both weights and connections for efficient neural network. In: Advances in Neural Information Processing Systems, pp. 1135\u20131143 (2015)"},{"key":"202_CR11","unstructured":"Hassibi, B., Stork, D.G.: Second order derivatives for network pruning: optimal brain surgeon. In: Advances in Neural Information Processing Systems, pp. 164\u2013171 (1993)"},{"key":"202_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1026\u20131034 (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"202_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"202_CR14","doi-asserted-by":"crossref","unstructured":"He, Y., Zhang, X., Sun, J.: Channel pruning for accelerating very deep neural networks. In: International Conference on Computer Vision (ICCV), vol. 2 (2017)","DOI":"10.1109\/ICCV.2017.155"},{"key":"202_CR15","unstructured":"Hu, H., Peng, R., Tai, Y.-W., Tang, C.-K.: Network trimming: a data-driven neuron pruning approach towards efficient deep architectures (2016). arXiv preprint arXiv:1607.03250"},{"key":"202_CR16","unstructured":"Huang, Z., Wang, N.: Data-driven sparse structure selection for deep neural networks (2017). arXiv preprint arXiv:1707.01213"},{"issue":"2","key":"202_CR17","first-page":"777","volume":"10","author":"J Langford","year":"2009","unstructured":"Langford, J., Li, L., Zhang, T.: Sparse online learning via truncated gradient. J. Mach. Learn. Res. 10(2), 777\u2013801 (2009)","journal-title":"J. Mach. Learn. Res."},{"issue":"7553","key":"202_CR18","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y Lecun","year":"2015","unstructured":"Lecun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"key":"202_CR19","unstructured":"LeCun, Y., Denker, J.S., Solla, S.A.: Optimal brain damage. In: Advances in Neural Information Processing Systems, pp. 598\u2013605 (1990)"},{"key":"202_CR20","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/978-3-642-35289-8_3","volume-title":"Neural Networks: Tricks of the Trade","author":"YA LeCun","year":"2012","unstructured":"LeCun, Y.A., Bottou, L., Orr, G.B., M\u00fcller, K.-R.: Efficient backprop. In: Montavon, G., Orr, G., M\u00fcller, K.R. (eds.) Neural Networks: Tricks of the Trade, pp. 9\u201348. Springer, Berlin (2012)"},{"key":"202_CR21","unstructured":"Li, H., Kadav, A., Durdanovic, I., Samet, H., Graf, H.P.: Pruning filters for efficient convnets (2016). arXiv preprint arXiv:1608.08710"},{"issue":"6","key":"202_CR22","doi-asserted-by":"publisher","first-page":"964","DOI":"10.1137\/0716071","volume":"16","author":"PL Lions","year":"1979","unstructured":"Lions, P.L., Mercier, B.: Splitting algorithms for the sum of two nonlinear operators. SIAM J. Numer. Anal. 16(6), 964\u2013979 (1979)","journal-title":"SIAM J. Numer. Anal."},{"key":"202_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, J., Shen, Z., Huang, G., Yan, S., Zhang, C.: Learning efficient convolutional networks through network slimming. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2755\u20132763. IEEE (2017)","DOI":"10.1109\/ICCV.2017.298"},{"key":"202_CR24","unstructured":"Liu, Z., Sun, M., Zhou, T., Huang, G., Darrell, T.: Rethinking the value of network pruning (2018). arXiv preprint arXiv:1810.05270"},{"key":"202_CR25","doi-asserted-by":"crossref","unstructured":"Luo, J.-H., Wu, J., Lin, W.: Thinet: a filter level pruning method for deep neural network compression (2017). arXiv preprint arXiv:1707.06342","DOI":"10.1109\/ICCV.2017.541"},{"issue":"6","key":"202_CR26","doi-asserted-by":"publisher","first-page":"1182","DOI":"10.1002\/mrm.21391","volume":"58","author":"M Lustig","year":"2007","unstructured":"Lustig, M., Donoho, D., Pauly, J.M.: Sparse MRI: the application of compressed sensing for rapid MR imaging. Magn. Reson. Med. Off. J. Int. Soc. Magn. Reson. Med. 58(6), 1182\u20131195 (2007)","journal-title":"Magn. Reson. Med. Off. J. Int. Soc. Magn. Reson. Med."},{"key":"202_CR27","unstructured":"McMahan, B.: Follow-the-regularized-leader and mirror descent: equivalence theorems and l1 regularization. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 525\u2013533 (2011)"},{"issue":"1","key":"202_CR28","first-page":"3117","volume":"18","author":"HB McMahan","year":"2017","unstructured":"McMahan, H.B.: A survey of algorithms and analysis for adaptive online learning. J. Mach. Learn. Res. 18(1), 3117\u20133166 (2017)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"202_CR29","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/BF00935173","volume":"33","author":"H Mine","year":"1981","unstructured":"Mine, H., Fukushima, M.: A minimization method for the sum of a convex function and a continuously differentiable function. J. Optim. Theory Appl. 33(1), 9\u201323 (1981)","journal-title":"J. Optim. Theory Appl."},{"key":"202_CR30","doi-asserted-by":"crossref","unstructured":"Mittal, D., Bhardwaj, S., Khapra, M.M., Ravindran, B.: Recovering from random pruning: on the plasticity of deep convolutional neural networks (2018)","DOI":"10.1109\/WACV.2018.00098"},{"key":"202_CR31","volume-title":"Problem Complexity and Method Efficiency in Optimization","author":"AS Nemirovsky","year":"1983","unstructured":"Nemirovsky, A.S., Yudin, D.B.: Problem Complexity and Method Efficiency in Optimization. Wiley, New York (1983)"},{"issue":"1","key":"202_CR32","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s10107-007-0149-x","volume":"120","author":"Y Nesterov","year":"2009","unstructured":"Nesterov, Y.: Primal-dual subgradient methods for convex problems. Math. Program. 120(1), 221\u2013259 (2009)","journal-title":"Math. Program."},{"key":"202_CR33","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: Understanding the exploding gradient problem (2012). CoRR arXiv:abs\/1211.5063"},{"key":"202_CR34","unstructured":"Pratt, L.Y.: Comparing biases for minimal network construction with back-propagation. In: International Conference on Neural Information Processing Systems, pp. 177\u2013185 (1988)"},{"key":"202_CR35","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: International Conference on Learning Representations (2015)"},{"key":"202_CR36","unstructured":"Wen, W., Wu, C., Wang, Y., Chen, Y., Li, H.: Learning structured sparsity in deep neural networks. In: Advances in Neural Information Processing Systems, pp. 2074\u20132082 (2016)"},{"key":"202_CR37","unstructured":"Xiao, L.: Dual averaging method for regularized stochastic learning and online optimization. In: Advances in Neural Information Processing Systems, pp. 2116\u20132124 (2009)"},{"issue":"Oct","key":"202_CR38","first-page":"2543","volume":"11","author":"L Xiao","year":"2010","unstructured":"Xiao, L.: Dual averaging methods for regularized stochastic learning and online optimization. J. Mach. Learn. Res. 11(Oct), 2543\u20132596 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"202_CR39","unstructured":"Zhu, M., Gupta, S.: To prune, or not to prune: exploring the efficacy of pruning for model compression (2017). arXiv preprint arXiv:1710.01878"}],"container-title":["Computational Optimization and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10589-020-00202-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10589-020-00202-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10589-020-00202-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,7,3]],"date-time":"2021-07-03T23:13:38Z","timestamp":1625354018000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10589-020-00202-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,4]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,9]]}},"alternative-id":["202"],"URL":"https:\/\/doi.org\/10.1007\/s10589-020-00202-1","relation":{},"ISSN":["0926-6003","1573-2894"],"issn-type":[{"value":"0926-6003","type":"print"},{"value":"1573-2894","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,7,4]]},"assertion":[{"value":"24 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2020","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}