{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T03:45:24Z","timestamp":1775706324112,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2010,11,11]],"date-time":"2010-11-11T00:00:00Z","timestamp":1289433600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2011,10]]},"DOI":"10.1007\/s10994-010-5221-8","type":"journal-article","created":{"date-parts":[[2010,11,10]],"date-time":"2010-11-10T17:17:30Z","timestamp":1289409450000},"page":"41-75","source":"Crossref","is-referenced-by-count":286,"title":["Dual coordinate descent methods for logistic regression and maximum entropy models"],"prefix":"10.1007","volume":"85","author":[{"given":"Hsiang-Fu","family":"Yu","sequence":"first","affiliation":[]},{"given":"Fang-Lan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Chih-Jen","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,11,11]]},"reference":[{"key":"5221_CR1","unstructured":"Baldridge, J., Morton, T., & Bierner, G. OpenNLP package, 2001. URL http:\/\/opennlp.sourceforge.net\/ ."},{"key":"5221_CR2","volume-title":"Nonlinear programming","author":"D. P. Bertsekas","year":"1999","unstructured":"Bertsekas, D. P. (1999). Nonlinear programming (2nd edn.). Belmont: Athena Scientific.","edition":"2"},{"key":"5221_CR3","first-page":"1369","volume":"9","author":"K.-W. Chang","year":"2008","unstructured":"Chang, K.-W., Hsieh, C.-J., & Lin, C.-J. (2008). Coordinate descent method for large-scale L2-loss linear SVM. Journal of Machine Learning Research, 9, 1369\u20131398.","journal-title":"Journal of Machine Learning Research"},{"key":"5221_CR4","first-page":"1775","volume":"9","author":"M. Collins","year":"2008","unstructured":"Collins, M., Globerson, A., Koo, T., Carreras, X., & Bartlett, P. (2008). Exponentiated gradient algorithms for conditional random fields and max-margin Markov networks. Journal of Machine Learning Research, 9, 1775\u20131822.","journal-title":"Journal of Machine Learning Research"},{"key":"5221_CR5","unstructured":"Crammer, K., & Singer, Y. (2000). On the learnability and design of output codes for multiclass problems. In Computational learning theory (pp.\u00a035\u201346)."},{"issue":"5","key":"5221_CR6","doi-asserted-by":"crossref","first-page":"1470","DOI":"10.1214\/aoms\/1177692379","volume":"43","author":"J. N. Darroch","year":"1972","unstructured":"Darroch, J. N., & Ratcliff, D. (1972). Generalized iterative scaling for log-linear models. The Annals of Mathematical Statistics, 43(5), 1470\u20131480.","journal-title":"The Annals of Mathematical Statistics"},{"issue":"4","key":"5221_CR7","doi-asserted-by":"crossref","first-page":"380","DOI":"10.1109\/34.588021","volume":"19","author":"S. Pietra Della","year":"1997","unstructured":"Della Pietra, S., Della Pietra, V., & Lafferty, J. (1997). Inducing features of random fields. IEEE Transactions on Pattern Analysis and Machine Intelligence, 19(4), 380\u2013393.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"5221_CR8","first-page":"1889","volume":"6","author":"R.-E. Fa","year":"2005","unstructured":"Fa, R.-E., Chan, P.-H., & Lin, C.-J. (2005). Working set selection using second order information for training SVM. Journal of Machine Learning Research, 6, 1889\u20131918.","journal-title":"Journal of Machine Learning Research"},{"key":"5221_CR9","unstructured":"Gao, J., Andrew, G., Johnson, M., & Toutanova, K. (2007). A comparative study of parameter estimation methods statistical natural language processing. In Proceedings of the 45th annual meeting of the association of computational linguistics (ACL) (pp.\u00a0824\u2013831)."},{"issue":"1","key":"5221_CR10","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1145\/103162.103163","volume":"23","author":"D. Goldberg","year":"1991","unstructured":"Goldberg, D. (1991). What every computer scientist should know about floating-point arithmetic. ACM Computing Surveys, 23(1), 5\u201348.","journal-title":"ACM Computing Surveys"},{"key":"5221_CR11","unstructured":"Goodman, J. (2002). Sequential conditional generalized iterative scaling. In Proceedings of the 40th annual meeting of the association of computational linguistics (ACL) (pp.\u00a09\u201316)."},{"key":"5221_CR12","volume-title":"Proceedings of the twenty fifth international conference on machine learning (ICML)","author":"C.-J. Hsieh","year":"2008","unstructured":"Hsieh, C.-J., Chang, K.-W., Lin, C.-J., Keerthi, S. S., & Sundararajan, S. (2008). A dual coordinate descent method for large-scale linear SVM. In Proceedings of the twenty fifth international conference on machine learning (ICML)."},{"issue":"2","key":"5221_CR13","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1109\/72.991427","volume":"13","author":"C.-W. Hsu","year":"2002","unstructured":"Hsu, C.-W., & Lin, C.-J. (2002). A comparison of methods for multi-class support vector machines. IEEE Transactions on Neural Networks, 13(2), 415\u2013425.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"5221_CR14","first-page":"815","volume":"11","author":"F.-L. Huang","year":"2010","unstructured":"Huang, F.-L., Hsien, C.-J., Chang, K.-W., & Lin, C.-J. (2010). Iterative scaling and coordinate descent methods for maximum entropy. Journal of Machine Learning Research, 11, 815\u2013848.","journal-title":"Journal of Machine Learning Research"},{"key":"5221_CR15","volume-title":"Proceedings of the conference on AI and statistics","author":"T. S. Jaakkola","year":"1999","unstructured":"Jaakkola, T. S., & Haussler, D. (1999). Probabilistic kernel regression models. In Proceedings of the conference on AI and statistics. Society for Artificial Intelligence in Statistics, New Jersey."},{"key":"5221_CR16","unstructured":"Jin, R., Yan, R., Zhang, J., & Hauptmann, A. G. (2003). A faster iterative scaling algorithm for conditional exponential model. In Proceedings of the twentieth international conference on machine learning (ICML)."},{"key":"5221_CR17","volume-title":"Advances in kernel methods\u2014support vector learning","author":"T. Joachims","year":"1998","unstructured":"Joachims, T. (1998). Making large-scale SVM learning practical. In B. Sch\u00f6lkopf, C. J. C. Burges, & A. J. Smola (Eds.), Advances in kernel methods\u2014support vector learning. Cambridge: MIT Press."},{"key":"5221_CR18","volume-title":"Speech and language processing: an introduction to natural language processing, computational linguistics and speech recognition","author":"D. Jurafsky","year":"2008","unstructured":"Jurafsky, D., & Martin, J. H. (2008). Speech and language processing: an introduction to natural language processing, computational linguistics and speech recognition (2nd edn.). New York: Prentice Hall.","edition":"2"},{"key":"5221_CR19","doi-asserted-by":"crossref","first-page":"637","DOI":"10.1162\/089976601300014493","volume":"13","author":"S. S. Keerthi","year":"2001","unstructured":"Keerthi, S. S., Shevade, S. K., Bhattacharyya, C., & Murthy, K. R. K. (2001). Improvements to Platt\u2019s SMO algorithm for SVM classifier design. Neural Computation, 13, 637\u2013649.","journal-title":"Neural Computation"},{"key":"5221_CR20","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1007\/s10994-005-0768-5","volume":"61","author":"S. S. Keerthi","year":"2005","unstructured":"Keerthi, S. S., Duan, K., Shevade, S., & Poo, A. N. (2005). A fast dual algorithm for kernel logistic regression. Machine Learning, 61, 151\u2013165.","journal-title":"Machine Learning"},{"key":"5221_CR21","doi-asserted-by":"crossref","unstructured":"Keerthi, S. S., Sundararajan, S., Chang, K.-W., Hsieh, C.-J., & Lin, C.-J. (2008). A sequential dual method for large scale multi-class linear SVMs. In Proceedings of the 14th ACM SIGKDD international conference on knowledge discovery and data mining.","DOI":"10.1145\/1401890.1401942"},{"key":"5221_CR22","unstructured":"Komarek, P., & Moore, A. W. (2005). Making logistic regression a core data mining tool: a practical investigation of accuracy, speed, and simplicity (Technical report TR-05-27). Robotics Institute, Carnegie Mellon University"},{"key":"5221_CR23","volume-title":"Advances in neural information processing systems","author":"G. Lebanon","year":"2002","unstructured":"Lebanon, G., & Lafferty, J. (2002). Boosting and maximum likelihood for exponential models. In Advances in neural information processing systems, vol.\u00a014. Cambridge: MIT Press."},{"key":"5221_CR24","first-page":"627","volume":"9","author":"C.-J. Lin","year":"2008","unstructured":"Lin, C.-J., Weng, R. C., & Keerthi, S. S. (2008). Trust region Newton method for large-scale logistic regression. Journal of Machine Learning Research, 9, 627\u2013650.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"5221_CR25","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1007\/BF01589116","volume":"45","author":"D. C. Liu","year":"1989","unstructured":"Liu, D. C., & Nocedal, J. (1989). On the limited memory BFGS method for large scale optimization. Mathematical Programming, 45(1), 503\u2013528.","journal-title":"Mathematical Programming"},{"issue":"1","key":"5221_CR26","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1007\/BF00939948","volume":"72","author":"Z.-Q. Luo","year":"1992","unstructured":"Luo, Z.-Q., & Tseng, P. (1992). On the convergence of coordinate descent method for convex differentiable minimization. Journal of Optimization Theory and Applications, 72(1), 7\u201335.","journal-title":"Journal of Optimization Theory and Applications"},{"key":"5221_CR27","first-page":"1","volume-title":"Proceedings of the 6th conference on Natural language learning","author":"R. Malouf","year":"2002","unstructured":"Malouf, R. (2002). A comparison of algorithms for maximum entropy parameter estimation. In Proceedings of the 6th conference on Natural language learning (pp. 1\u20137). Stroudsburg: Association for Computational Linguistics."},{"key":"5221_CR28","unstructured":"Memisevic, R. (2006). Dual optimization of conditional probability models (Technical report). Department of Computer Science, University of Toronto."},{"key":"5221_CR29","unstructured":"Minka, T. P. (2003). A comparison of numerical optimizers for logistic regression. URL http:\/\/research.microsoft.com\/~minka\/papers\/logreg\/ ."},{"key":"5221_CR30","unstructured":"P\u00e9rez-Cruz, F., Figueiras-Vidal, A. R., & Art\u00e9s-Rodr\u00edguez, A. (2004). Double chunking for solving SVMs for very large datasets. In Proceedings of learning 2004, Spain 2004"},{"key":"5221_CR31","unstructured":"Ratnaparkhi, A. (1998). Maximum entropy models for natural language ambiguity resolution (PhD thesis). University of Pennsylvania."},{"key":"5221_CR32","unstructured":"R\u00fcping, S. (2000). mySVM\u2014another one of those support vector machines. Software available at http:\/\/www-ai.cs.uni-dortmund.de\/SOFTWARE\/MYSVM\/ ."},{"issue":"1\u20133","key":"5221_CR33","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/A:1012498226479","volume":"46","author":"T. Zhang","year":"2002","unstructured":"Zhang, T. (2002). On the dual formulation of regularized linear systems with convex risks. Machine Learning, 46(1\u20133), 91\u2013129.","journal-title":"Machine Learning"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-010-5221-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-010-5221-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-010-5221-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,6]],"date-time":"2019-06-06T01:32:02Z","timestamp":1559784722000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-010-5221-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,11,11]]},"references-count":33,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2011,10]]}},"alternative-id":["5221"],"URL":"https:\/\/doi.org\/10.1007\/s10994-010-5221-8","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,11,11]]}}}