{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T08:59:37Z","timestamp":1768899577091,"version":"3.49.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2013,5,8]],"date-time":"2013-05-08T00:00:00Z","timestamp":1367971200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2013,10]]},"DOI":"10.1007\/s10994-013-5367-2","type":"journal-article","created":{"date-parts":[[2013,5,7]],"date-time":"2013-05-07T19:18:23Z","timestamp":1367954303000},"page":"31-52","source":"Crossref","is-referenced-by-count":50,"title":["Block coordinate descent algorithms for large-scale sparse multiclass classification"],"prefix":"10.1007","volume":"93","author":[{"given":"Mathieu","family":"Blondel","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuhiro","family":"Seki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kuniaki","family":"Uehara","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,5,8]]},"reference":[{"issue":"1","key":"5367_CR1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000015","volume":"4","author":"F. R. Bach","year":"2012","unstructured":"Bach, F. R., Jenatton, R., Mairal, J., & Obozinski, G. (2012). Optimization with sparsity-inducing penalties. Foundations and Trends in Machine Learning, 4(1), 1\u2013106.","journal-title":"Foundations and Trends in Machine Learning"},{"key":"5367_CR2","unstructured":"Bakin, S. (1999). Adaptative regression and model selection in data mining problems. Ph.D. thesis, Australian National University."},{"key":"5367_CR3","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1137\/080716542","volume":"2","author":"A. Beck","year":"2009","unstructured":"Beck, A., & Teboulle, M. (2009). A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM Journal on Imaging Sciences, 2, 183\u2013202.","journal-title":"SIAM Journal on Imaging Sciences"},{"key":"5367_CR4","volume-title":"Nonlinear programming","author":"D. P. Bertsekas","year":"1999","unstructured":"Bertsekas, D. P. (1999). Nonlinear programming. Belmont: Athena Scientific."},{"key":"5367_CR5","first-page":"144","volume-title":"Proceedings of conference on learning theory (COLT)","author":"B. E. Boser","year":"1992","unstructured":"Boser, B. E., Guyon, I. M., & Vapnik, V. N. (1992). A training algorithm for optimal margin classifiers. In Proceedings of conference on learning theory (COLT) (pp.\u00a0144\u2013152)."},{"key":"5367_CR6","first-page":"1369","volume":"9","author":"K. W. Chang","year":"2008","unstructured":"Chang, K. W., Hsieh, C. J., & Lin, C. J. (2008). Coordinate descent method for large-scale l2-loss linear support vector machines. Journal of Machine Learning Research, 9, 1369\u20131398.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR7","doi-asserted-by":"crossref","first-page":"1168","DOI":"10.1137\/050626090","volume":"4","author":"P. Combettes","year":"2005","unstructured":"Combettes, P., & Wajs, V. (2005). Signal recovery by proximal forward-backward splitting. Multiscale Modeling & Simulation, 4, 1168\u20131200.","journal-title":"Multiscale Modeling & Simulation"},{"key":"5367_CR8","first-page":"265","volume":"2","author":"K. Crammer","year":"2002","unstructured":"Crammer, K., & Singer, Y. (2002). On the algorithmic implementation of multiclass kernel-based vector machines. Journal of Machine Learning Research, 2, 265\u2013292.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR9","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1145\/1390156.1390190","volume-title":"Proceedings of international conference on machine learning (ICML)","author":"M. Dredze","year":"2008","unstructured":"Dredze, M., Crammer, K., & Pereira, F. (2008). Confidence-weighted linear classification. In Proceedings of international conference on machine learning (ICML) (pp.\u00a0264\u2013271)."},{"key":"5367_CR10","first-page":"297","volume-title":"Proceedings of international conference on machine learning (ICML)","author":"J. Duchi","year":"2009","unstructured":"Duchi, J., & Singer, Y. (2009a). Boosting with structural sparsity. In Proceedings of international conference on machine learning (ICML) (pp.\u00a0297\u2013304)."},{"key":"5367_CR11","first-page":"2899","volume":"10","author":"J. Duchi","year":"2009","unstructured":"Duchi, J., & Singer, Y. (2009b). Efficient online and batch learning using forward backward splitting. Journal of Machine Learning Research, 10, 2899\u20132934.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR12","first-page":"681","volume-title":"Proceedings of neural information processing systems (NIPS)","author":"A. Elisseeff","year":"2001","unstructured":"Elisseeff, A., & Weston, J. (2001). A kernel method for multi-labelled classification. In Proceedings of neural information processing systems (NIPS) (pp.\u00a0681\u2013687)."},{"key":"5367_CR13","unstructured":"Fan, R. E., & Lin, C. J. (2007). A study on threshold selection for multi-label classification. Tech. rep., National Taiwan University."},{"key":"5367_CR14","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1214\/07-AOAS131","volume":"1","author":"J. Friedman","year":"2007","unstructured":"Friedman, J., Hastie, T., H\u00f6fling, H., & Tibshirani, R. (2007). Pathwise coordinate optimization. The Annals of Applied Statistics, 1, 302\u2013332.","journal-title":"The Annals of Applied Statistics"},{"key":"5367_CR15","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2010a). A note on the group lasso and a sparse group lasso. Tech. Rep. arXiv:1001.0736 ."},{"key":"5367_CR16","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"J. H. Friedman","year":"2010","unstructured":"Friedman, J. H., Hastie, T., & Tibshirani, R. (2010b). Regularization paths for generalized linear models via coordinate descent. Journal of Statistical Software, 33, 1\u201322.","journal-title":"Journal of Statistical Software"},{"key":"5367_CR17","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1080\/10618600.1998.10474784","volume":"7","author":"W. J. Fu","year":"1998","unstructured":"Fu, W. J. (1998). Penalized regressions: the bridge versus the lasso. Journal of Computational and Graphical Statistics, 7, 397\u2013416.","journal-title":"Journal of Computational and Graphical Statistics"},{"key":"5367_CR18","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1198\/016214504000000098","volume":"99","author":"Y. Lee","year":"2004","unstructured":"Lee, Y., Lin, Y., & Wahba, G. (2004). Multicategory support vector machines, theory, and application to the classification of microarray data and satellite radiance data. Journal of the American Statistical Association, 99, 67\u201381.","journal-title":"Journal of the American Statistical Association"},{"key":"5367_CR19","doi-asserted-by":"crossref","first-page":"913","DOI":"10.1080\/1055678021000028375","volume":"17","author":"O. Mangasarian","year":"2002","unstructured":"Mangasarian, O. (2002). A finite Newton method for classification. Optimization Methods and Software, 17, 913\u2013929.","journal-title":"Optimization Methods and Software"},{"issue":"1","key":"5367_CR20","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1111\/j.1467-9868.2007.00627.x","volume":"70","author":"L. Meier","year":"2008","unstructured":"Meier, L., Van de Geer, S., & B\u00fchlmann, P. (2008). The group lasso for logistic regression. Journal of the Royal Statistical Society. Series B. Statistical Methodology, 70(1), 53\u201371.","journal-title":"Journal of the Royal Statistical Society. Series B. Statistical Methodology"},{"issue":"2","key":"5367_CR21","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1007\/s11222-008-9111-x","volume":"20","author":"G. Obozinski","year":"2010","unstructured":"Obozinski, G., Taskar, B., & Jordan, M. I. (2010). Joint covariate selection and joint subspace selection for multiple classification problems. Statistics and Computing, 20(2), 231\u2013252.","journal-title":"Statistics and Computing"},{"key":"5367_CR22","unstructured":"Qin, Z., Scheinberg, K., & Goldfarb, D. (2010). Efficient block-coordinate descent algorithms for the group lasso. Tech. rep., Columbia University."},{"key":"5367_CR23","doi-asserted-by":"crossref","unstructured":"Richt\u00e1rik, P., & Tak\u00e1\u010d, M. (2012a). Iteration complexity of randomized block-coordinate descent methods for minimizing a composite function. Mathematical Programming, 1\u201338.","DOI":"10.1007\/s10107-012-0614-z"},{"key":"5367_CR24","unstructured":"Richt\u00e1rik, P., & Tak\u00e1\u010d, M. (2012b). Parallel coordinate descent methods for big data optimization. Tech. Rep. arXiv:1212.0873 ."},{"key":"5367_CR25","first-page":"101","volume":"5","author":"R. Rifkin","year":"2004","unstructured":"Rifkin, R., & Klautau, A. (2004). In defense of one-vs-all classification. Journal of Machine Learning Research, 5, 101\u2013141.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR26","unstructured":"Shalev-Shwartz, S., Singer, Y., Srebro, N., & Cotter, A. (2010). Pegasos: primal estimated sub-gradient solver for svm. Mathematical Programming, 1\u201328."},{"issue":"17","key":"5367_CR27","doi-asserted-by":"crossref","first-page":"2246","DOI":"10.1093\/bioinformatics\/btg308","volume":"19","author":"S. K. Shevade","year":"2003","unstructured":"Shevade, S. K., & Keerthi, S. S. (2003). A simple and efficient algorithm for gene selection using sparse logistic regression. Bioinformatics, 19(17), 2246\u20132253.","journal-title":"Bioinformatics"},{"key":"5367_CR28","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1007\/s10107-007-0170-0","volume":"117","author":"P. Tseng","year":"2009","unstructured":"Tseng, P., & Yun, S. (2009). A coordinate gradient descent method for nonsmooth separable minimization. Mathematical Programming, 117, 387\u2013423.","journal-title":"Mathematical Programming"},{"key":"5367_CR29","first-page":"1113","volume-title":"Proceedings of international conference on machine learning (ICML)","author":"K. Weinberger","year":"2009","unstructured":"Weinberger, K., Dasgupta, A., Langford, J., Smola, A., & Attenberg, J. (2009). Feature hashing for large scale multitask learning. In Proceedings of international conference on machine learning (ICML) (pp.\u00a01113\u20131120)."},{"key":"5367_CR30","first-page":"219","volume-title":"Proceedings of European symposium on artificial neural networks, computational intelligence and machine learning","author":"J. Weston","year":"1999","unstructured":"Weston, J., & Watkins, C. (1999). Support vector machines for multi-class pattern recognition. In Proceedings of European symposium on artificial neural networks, computational intelligence and machine learning (pp.\u00a0219\u2013224)."},{"key":"5367_CR31","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1137\/100808563","volume":"22","author":"S. J. Wright","year":"2012","unstructured":"Wright, S. J. (2012). Accelerated block-coordinate relaxation for regularized optimization. SIAM Journal on Optimization, 22, 159\u2013186.","journal-title":"SIAM Journal on Optimization"},{"issue":"7","key":"5367_CR32","doi-asserted-by":"crossref","first-page":"2479","DOI":"10.1109\/TSP.2009.2016892","volume":"57","author":"S. J. Wright","year":"2009","unstructured":"Wright, S. J., Nowak, R. D., & Figueiredo, M. A. T. (2009). Sparse reconstruction by separable approximation. Transactions on Signal Processing, 57(7), 2479\u20132493.","journal-title":"Transactions on Signal Processing"},{"key":"5367_CR33","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1111\/j.1467-9868.2005.00532.x","volume":"68","author":"M. Yuan","year":"2006","unstructured":"Yuan, M., & Lin, Y. (2006). Model selection and estimation in regression with grouped variables. Journal of the Royal Statistical Society, Series B, 68, 49\u201367.","journal-title":"Journal of the Royal Statistical Society, Series B"},{"key":"5367_CR34","first-page":"3183","volume":"11","author":"G. X. Yuan","year":"2010","unstructured":"Yuan, G. X., Chang, K. W., Hsieh, C. J., & Lin, C. J. (2010). A comparison of optimization methods and software for large-scale l1-regularized linear classification. Journal of Machine Learning Research, 11, 3183\u20133234.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR35","first-page":"33","volume-title":"Proceedings of the international conference on knowledge discovery and data mining","author":"G. X. Yuan","year":"2011","unstructured":"Yuan, G. X., Ho, C. H., & Lin, C. J. (2011). An improved glmnet for l1-regularized logistic regression. In Proceedings of the international conference on knowledge discovery and data mining (pp.\u00a033\u201341)."},{"key":"5367_CR36","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1214\/08-EJS122","volume":"2","author":"H. H. Zhang","year":"2006","unstructured":"Zhang, H. H., Liu, Y., Wu, Y., & Zhu, J. (2006). Variable selection for multicategory svm via sup-norm regularization. Electronic Journal of Statistics, 2, 149\u2013167.","journal-title":"Electronic Journal of Statistics"},{"key":"5367_CR37","first-page":"2541","volume":"7","author":"P. Zhao","year":"2006","unstructured":"Zhao, P., & Yu, B. (2006). On model selection consistency of lasso. Journal of Machine Learning Research, 7, 2541\u20132563.","journal-title":"Journal of Machine Learning Research"},{"key":"5367_CR38","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H. Zou","year":"2005","unstructured":"Zou, H., & Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society, Series B, 67, 301\u2013320.","journal-title":"Journal of the Royal Statistical Society, Series B"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-013-5367-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-013-5367-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-013-5367-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T07:52:09Z","timestamp":1715327529000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-013-5367-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,5,8]]},"references-count":38,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2013,10]]}},"alternative-id":["5367"],"URL":"https:\/\/doi.org\/10.1007\/s10994-013-5367-2","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,5,8]]}}}