{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T02:03:15Z","timestamp":1776132195835,"version":"3.50.1"},"reference-count":143,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2012,9,1]],"date-time":"2012-09-01T00:00:00Z","timestamp":1346457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1109\/jproc.2012.2188013","type":"journal-article","created":{"date-parts":[[2012,4,11]],"date-time":"2012-04-11T19:39:36Z","timestamp":1334173176000},"page":"2584-2603","source":"Crossref","is-referenced-by-count":211,"title":["Recent Advances of Large-Scale Linear Classification"],"prefix":"10.1109","volume":"100","author":[{"given":"Guo-Xun","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Chia-Hua","family":"Ho","sequence":"additional","affiliation":[]},{"given":"Chih-Jen","family":"Lin","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s10589-009-9251-8"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1198\/004017007000000245"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5221-8"},{"key":"ref32","author":"komarek","year":"2005","journal-title":"Making logistic regression a core data mining tool A practical investigation of accuracy speed and simplicity"},{"key":"ref31","first-page":"282","article-title":"A faster iterative scaling algorithm for conditional exponential model","author":"jin","year":"2003","journal-title":"Proc 20th Int Conf Mach Learn"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073086"},{"key":"ref37","first-page":"1519","article-title":"An interior-point method for large-scale <ref_formula><tex Notation=\"TeX\">$L_1$<\/tex><\/ref_formula> -regularized logistic regression","volume":"8","author":"koh","year":"2007","journal-title":"J Mach Learn Res"},{"key":"ref36","first-page":"1517","article-title":"Exact 1-norm support vector machines via unconstrained convex differentiable minimization","volume":"7","author":"mangasarian","year":"2006","journal-title":"J Mach Learn Res"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1023\/B:COAP.0000026884.66338.df"},{"key":"ref34","author":"zhu","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"key":"ref28","first-page":"627","article-title":"Trust region Newton method for large-scale logistic regression","volume":"9","author":"lin","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref27","first-page":"341","article-title":"A modified finite Newton method for fast solution of large scale linear SVMs","volume":"6","author":"keerthi","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref29","author":"minka","year":"2003","journal-title":"A comparison of numerical optimizers for logistic regression"},{"key":"ref20","first-page":"713","article-title":"A fast hybrid algorithm for large scale <ref_formula><tex Notation=\"TeX\">$\\ell_1$<\/tex> <\/ref_formula>-regularized logistic regression","volume":"11","author":"shi","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref22","author":"yuan","year":"2011","journal-title":"An improved GLMNET for -regularized logistic regression and support vector machines"},{"key":"ref21","first-page":"1369","article-title":"Coordinate descent method for large-scale L2-loss linear SVM","volume":"9","author":"chang","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390197"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1080\/10556780008805771"},{"key":"ref101","author":"nesterov","year":"2010","journal-title":"Efficiency of Coordinate Descent Methods on Huge-Scale Optimization Problems"},{"key":"ref26","author":"bottou","year":"2007","journal-title":"Stochastic Gradient Descent Examples"},{"key":"ref100","first-page":"1579","article-title":"Fast kernel classifiers with online and active learning","volume":"6","author":"bordes","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref25","first-page":"311","article-title":"Bundle methods for regularized risk minimization","volume":"11","author":"teo","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/72.991427"},{"key":"ref51","first-page":"101","article-title":"In defense of one-vs-all classification","volume":"5","author":"rifkin","year":"2004","journal-title":"J Mach Learn Res"},{"key":"ref59","first-page":"219","article-title":"Multi-class support vector machines","author":"weston","year":"1999","journal-title":"Proc Eur Symp Artif Neural Netw"},{"key":"ref58","first-page":"547","volume":"12","author":"platt","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref57","author":"huang","year":"2010","journal-title":"Comparison of L2-regularized multi-class linear classifiers"},{"key":"ref56","author":"friedman","year":"1996","journal-title":"Another approach to polychotomous classification"},{"key":"ref55","author":"knerr","year":"1990","journal-title":"Neurocomputing Algorithms Architectures and Applications"},{"key":"ref54","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1109\/ICPR.1994.576879","article-title":"Comparison of classifier methods: A case study in handwriting digit recognition","author":"bottou","year":"1994","journal-title":"Proc Int Conf Pattern Recognit"},{"key":"ref53","first-page":"85","article-title":"Generalized Bradley-Terry models and multi-class probability estimates","volume":"7","author":"huang","year":"2006","journal-title":"J Mach Learn Res"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1162\/15324430152733133"},{"key":"ref40","article-title":"Scalable training of <ref_formula><tex Notation=\"TeX\">$L_1$<\/tex><\/ref_formula>-regularized log-linear models","author":"andrew","year":"2007","journal-title":"Proc 24th Int Conf Mach Learn"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150429"},{"key":"ref3","author":"cramer","year":"2002","journal-title":"The origins of logistic regression"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390208"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273598"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1961189.1961199"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2009.2016892"},{"key":"ref7","first-page":"1871","article-title":"LIBLINEAR: A library for large linear classification","volume":"9","author":"fan","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref9","first-page":"1471","article-title":"Training and testing low-degree polynomial data mappings via linear SVM","volume":"11","author":"chang","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/BF00939948"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1137\/S1052623498345075"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1214\/07-AOAS131"},{"key":"ref47","first-page":"169","author":"joachims","year":"1998","journal-title":"Advances in Kernel MethodsSupport Vector Learning"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557082"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v033.i01"},{"key":"ref44","author":"schmidt","year":"2009","journal-title":"Optimization methods for -regularization"},{"key":"ref43","first-page":"1537","article-title":"Super-linear convergence of dual augmented Lagrangian algorithm for sparse learning","volume":"12","author":"tomioka","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref127","first-page":"456","article-title":"Distributed training strategies for the structured perceptron","author":"mcdonald","year":"2010","journal-title":"Proc 48th Annu Meeting Assoc Comput Linguist"},{"key":"ref126","first-page":"2595","author":"zinkevich","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1145\/1367497.1367554"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1007\/BF00058655"},{"key":"ref73","first-page":"156","article-title":"Adaptive constraint reduction for training support vector machines","volume":"31","author":"jung","year":"2008","journal-title":"Electron Trans Numer Anal"},{"key":"ref72","author":"gertz","year":"2005","journal-title":"Support vector machine classifiers for large data sets"},{"key":"ref129","first-page":"1453","article-title":"Large margin methods for structured and interdependent output variables","volume":"6","author":"tsochantaridis","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref71","first-page":"1775","article-title":"Exponentiated gradient algorithms for conditional random fields and max-margin Markov networks","volume":"9","author":"collins","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"ref70","author":"memisevic","year":"2006","journal-title":"Dual optimization of conditional probability models"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1401936"},{"key":"ref130","author":"taskar","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020519"},{"key":"ref74","first-page":"167","article-title":"Kernel expansion for online preference tracking","author":"moh","year":"2008","journal-title":"Proc Int Soc Music Inf Retrieval"},{"key":"ref75","first-page":"999","article-title":"COFFIN: A computational framework for linear SVMs","author":"sonnenburg","year":"2010","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref133","first-page":"433","article-title":"A stochastic quasi-Newton method for online convex optimization","author":"schraudolph","year":"2007","journal-title":"Proc 11th Int Conf Artif Intell Stat"},{"key":"ref134","author":"chen","year":"2009","journal-title":"Newton methods for conditional random fields"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073473"},{"key":"ref78","article-title":"ASSET: Approximate stochastic subgradient estimation training for support vector machines","author":"lee","year":"2012","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143966"},{"key":"ref79","first-page":"682","author":"williams","year":"2001","journal-title":"Advances in Neural Information Processing Systems 13"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1137\/0108053"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5108-8"},{"key":"ref138","author":"vapnik","year":"1998","journal-title":"Statistical Learning Theory"},{"key":"ref137","first-page":"380","article-title":"(Online) subgradient methods for structured prediction","author":"ratliff","year":"2007","journal-title":"Proc 11th Int Conf Artif Intell Stat"},{"key":"ref60","first-page":"265","article-title":"On the algorithmic implementation of multiclass kernel-based vector machines","volume":"2","author":"crammer","year":"2001","journal-title":"J Mach Learn Res"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.20042"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2002.1031937"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1198\/016214504000000098"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1401942"},{"key":"ref64","first-page":"39","article-title":"A maximum entropy approach to natural language processing","volume":"22","author":"berger","year":"1996","journal-title":"Comput Linguist"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2007.910281"},{"key":"ref65","first-page":"282","article-title":"Conditional random fields: Probabilistic models for segmenting and labeling sequence data","author":"lafferty","year":"2001","journal-title":"Proc 18th Int Conf Mach Learn"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2007.910971"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.3115\/1118853.1118871"},{"key":"ref142","article-title":"Efficient projections onto the <ref_formula><tex Notation=\"TeX\">$L_1$<\/tex><\/ref_formula>-ball for learning in high dimensions","author":"duchi","year":"2008","journal-title":"Proc 25th Int Conf Mach Learn"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177692379"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1137\/080716542"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/BF01589116"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/BF00994018"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/34.588021"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/130385.130401"},{"key":"ref109","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref95","author":"white","year":"2010","journal-title":"Hadoop The Definitive Guide"},{"key":"ref108","first-page":"2899","article-title":"Efficient online and batch learning using forward backward splitting","volume":"10","author":"duchi","year":"2009","journal-title":"J Mach Learn Res"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2009.29"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-007-0149-x"},{"key":"ref93","first-page":"257","author":"chang","year":"2008","journal-title":"Advances in Neural Information Processing Systems 20"},{"key":"ref106","first-page":"1865","article-title":"Stochastic methods for <ref_formula><tex Notation=\"TeX\">$L_1$<\/tex><\/ref_formula>-regularized loss minimization","volume":"12","author":"shalev-shwartz","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1145\/1835804.1835910"},{"key":"ref105","first-page":"771","article-title":"Sparse online learning via truncated gradient","volume":"10","author":"langford","year":"2009","journal-title":"J Mach Learn Res"},{"key":"ref91","author":"li","year":"2011","journal-title":"Hashing algorithms for large-scale learning"},{"key":"ref104","first-page":"2229","article-title":"Erratum: SGD-QN is less careful than expected","volume":"11","author":"bordes","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/1978542.1978566"},{"key":"ref103","first-page":"1737","article-title":"SGD-QN: Careful quasi-Newton stochastic gradient descent","volume":"10","author":"bordes","year":"2009","journal-title":"J Mach Learn Res"},{"key":"ref102","author":"richtrik","year":"2011","journal-title":"Iteration complexity of randomized block-coordinate descent methods for minimizing a composite function"},{"key":"ref111","first-page":"321","article-title":"Parallel coordinate descent for <ref_formula><tex Notation=\"TeX\">$L_1$<\/tex><\/ref_formula> -regularized loss minimization","author":"bradley","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref112","author":"langford","year":"2007","journal-title":"Vowpal Wabbit"},{"key":"ref110","first-page":"2543","article-title":"Dual averaging methods for regularized stochastic learning and online optimization","volume":"11","author":"xiao","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015332"},{"key":"ref99","first-page":"217","author":"bottou","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref97","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1214\/aoms\/1177729392","article-title":"Stochastic estimation of the maximum of a regression function","volume":"23","author":"kiefer","year":"1952","journal-title":"Ann Math Stat"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/089976603321891855"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-009-8467-7_1"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/1667583.1667671"},{"key":"ref13","first-page":"3183","article-title":"A comparison of optimization methods and software for large-scale <ref_formula><tex Notation=\"TeX\"> $L_1$<\/tex><\/ref_formula>-regularized linear classification","volume":"11","author":"yuan","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1080\/1055678021000028375"},{"key":"ref15","article-title":"Feature selection, L<ref_formula><tex Notation=\"TeX\">$_1$<\/tex><\/ref_formula> vs. L<ref_formula> <tex Notation=\"TeX\">$_2$<\/tex><\/ref_formula> regularization, and rotational invariance","author":"ng","year":"2004","journal-title":"Proc 21st Int Conf Mach Learn"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1016\/0898-1221(76)90003-1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9868.2011.00771.x"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102356"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1561\/2200000016"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2008.929958"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1162\/15324430260185619"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9868.2005.00503.x"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1016\/S0022-0000(03)00025-4"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1145\/1791212.1791218"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972719.13"},{"key":"ref83","first-page":"1177","author":"rahimi","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1137\/S1052623400374379"},{"key":"ref113","author":"tong","year":"2010","journal-title":"Lessons Learned Developing a Practical Large Scale Machine Learning System"},{"key":"ref116","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780195100624.001.0001","author":"censor","year":"1998","journal-title":"Parallel Optimization Theory Algorithms and Applications"},{"key":"ref80","first-page":"2153","article-title":"On the Nystr&#x00F6;m method for approximating a gram matrix for improved kernel-based learning","volume":"6","author":"drineas","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020517"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1016\/S1570-579X(01)80023-9"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772759"},{"key":"ref121","first-page":"2331","author":"langford","year":"2009","journal-title":"Advances in Neural Information Processing Systems 22"},{"key":"ref122","author":"agarwal","year":"2011","journal-title":"Advances in Neural Information Processing Systems 24"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1145\/956755.956786"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150436"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972818.19"},{"key":"ref87","first-page":"496","article-title":"Hash kernels","volume":"5","author":"shi","year":"2009","journal-title":"Proc 12th Int Conf Artif Intell Stat"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553516"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5\/6269941\/06177645.pdf?arnumber=6177645","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,23]],"date-time":"2024-04-23T03:04:36Z","timestamp":1713841476000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6177645\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,9]]},"references-count":143,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2012.2188013","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,9]]}}}