{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T15:54:51Z","timestamp":1769874891492,"version":"3.49.0"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2013,3,22]],"date-time":"2013-03-22T00:00:00Z","timestamp":1363910400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2013,5]]},"DOI":"10.1007\/s10994-013-5327-x","type":"journal-article","created":{"date-parts":[[2013,3,21]],"date-time":"2013-03-21T19:11:08Z","timestamp":1363893068000},"page":"155-187","source":"Crossref","is-referenced-by-count":82,"title":["Adaptive regularization of weight vectors"],"prefix":"10.1007","volume":"91","author":[{"given":"Koby","family":"Crammer","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex","family":"Kulesza","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Dredze","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,3,22]]},"reference":[{"key":"5327_CR1","unstructured":"Bekkerman, R., McCallum, A., & Huang, G. (2004). Automatic categorization of email into folders: benchmark experiments on Enron and SRI corpora (Technical Report IR 418:1). Center for Intelligent Information Retrieval."},{"issue":"3","key":"5327_CR2","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pcbi.0030054","volume":"3","author":"A. Bernal","year":"2007","unstructured":"Bernal, A., Crammer, K., Hatzigeorgiou, A., & Pereira, F. (2007). Global discriminative learning for higher-accuracy computational gene prediction. PLoS Computational Biology, 3(3), e54.","journal-title":"PLoS Computational Biology"},{"key":"5327_CR3","volume-title":"ACL","author":"J. Blitzer","year":"2007","unstructured":"Blitzer, J., Dredze, M., & Pereira, F. (2007). Biographies, bollywood, boom-boxes and blenders: domain adaptation for sentiment classification. In ACL."},{"key":"5327_CR4","series-title":"LNAI","volume-title":"European conference on machine learning (ECML)","author":"A. Bordes","year":"2005","unstructured":"Bordes, A., & Bottou, L. (2005). The huller: a simple and efficient online svm. In LNAI: Vol.\u00a03720. European conference on machine learning (ECML)."},{"key":"5327_CR5","volume-title":"KDD-2006","author":"V. R. Carvalho","year":"2006","unstructured":"Carvalho, V. R., & Cohen, W. W. (2006). Single-pass online learning: performance, voting schemes and online feature selection. In KDD-2006."},{"key":"5327_CR6","volume-title":"Parallel optimization: theory, algorithms, and applications","author":"Y. Censor","year":"1997","unstructured":"Censor, Y., & Zenios, S. (1997). Parallel optimization: theory, algorithms, and applications. New York: Oxford University Press."},{"issue":"3","key":"5327_CR7","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1145\/258128.258179","volume":"44","author":"N. Cesa-Bianchi","year":"1997","unstructured":"Cesa-Bianchi, N., Freund, Y., Haussler, D., Helmbold, D. P., Schapire, R. E., & Warmuth, M. K. (1997). How to use expert advice. Journal of the Association for Computing Machinery, 44(3), 427\u2013485.","journal-title":"Journal of the Association for Computing Machinery"},{"key":"5327_CR8","doi-asserted-by":"crossref","unstructured":"Cesa-Bianchi, N., Conconi, A., & Gentile, C. (2005). A\u00a0second-order perceptron algorithm. SIAM Journal on Computing, 34.","DOI":"10.1137\/S0097539703432542"},{"key":"5327_CR9","first-page":"1109","volume":"11","author":"G. Chechik","year":"2010","unstructured":"Chechik, G., Sharma, V., Shalit, U., & Bengio, S. (2010). Large scale online learning of image similarity through ranking. Journal of Machine Learning Research, 11, 1109\u20131135.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR10","doi-asserted-by":"crossref","first-page":"224","DOI":"10.3115\/1613715.1613747","volume-title":"Proceedings of the conference on empirical methods in natural language processing","author":"D. Chiang","year":"2008","unstructured":"Chiang, D., Marton, Y., & Resnik, P. (2008). Online large-margin training of syntactic and structural translation features. In Proceedings of the conference on empirical methods in natural language processing (pp. 224\u2013233). Stroudsburg: Association for Computational Linguistics."},{"key":"5327_CR11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3115\/1118693.1118694","volume-title":"Proceedings of the ACL-02 conference on empirical methods in natural language processing","author":"M. Collins","year":"2002","unstructured":"Collins, M. (2002). Discriminative training methods for hidden Markov models: theory and experiments with perceptron algorithms. In Proceedings of the ACL-02 conference on empirical methods in natural language processing (Vol.\u00a010, pp. 1\u20138). Stroudsburg: Association for Computational Linguistics."},{"key":"5327_CR12","volume-title":"IEEE int. conf. on acoustics, speech, and signal processing (ICASSP)","author":"K. Crammer","year":"2010","unstructured":"Crammer, K. (2010). Efficient online learning with individual learning-rates for phoneme sequence recognition. In IEEE int. conf. on acoustics, speech, and signal processing (ICASSP)."},{"key":"5327_CR13","volume-title":"Advances in neural information processing systems 24","author":"K. Crammer","year":"2010","unstructured":"Crammer, K., & Lee, D. D. (2010). Learning via Gaussian herding. In Advances in neural information processing systems 24."},{"key":"5327_CR14","first-page":"951","volume":"3","author":"K. Crammer","year":"2003","unstructured":"Crammer, K., & Singer, Y. (2003). Ultraconservative online algorithms for multiclass problems. Journal of Machine Learning Research, 3, 951\u2013991.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR15","volume-title":"Advances in neural information processing systems 16","author":"K. Crammer","year":"2003","unstructured":"Crammer, K., Dekel, O., Shalev-Shwartz, S., & Singer, Y. (2003). Online passive aggressive algorithms. In Advances in neural information processing systems 16."},{"key":"5327_CR16","first-page":"551","volume":"7","author":"K. Crammer","year":"2006","unstructured":"Crammer, K., Dekel, O., Keshet, J., Shalev-Shwartz, S., & Singer, Y. (2006). Online passive-aggressive algorithms. Journal of Machine Learning Research, 7, 551\u2013585.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR17","volume-title":"Neural information processing systems (NIPS)","author":"K. Crammer","year":"2008","unstructured":"Crammer, K., Dredze, M., & Pereira, F. (2008). Exact convex confidence-weighted learning. In Neural information processing systems (NIPS)."},{"key":"5327_CR18","volume-title":"Empirical methods in natural language processing (EMNLP)","author":"K. Crammer","year":"2009","unstructured":"Crammer, K., Dredze, M., & Kulesza, A. (2009a). Multi-class confidence weighted algorithms. In Empirical methods in natural language processing (EMNLP)."},{"key":"5327_CR19","first-page":"414","volume-title":"Advances in neural information processing systems 23","author":"K. Crammer","year":"2009","unstructured":"Crammer, K., Kulesza, A., & Dredze, M. (2009b). Adaptive regularization of weight vectors. In Advances in neural information processing systems 23 (pp. 414\u2013422)."},{"key":"5327_CR20","volume-title":"Proceedings of the twelfth intentional conference on artificial intelligence and statistics (AISTATS)","author":"K. Crammer","year":"2009","unstructured":"Crammer, K., Mohri, M., & Pereira, F. (2009c). Gaussian margin machines. In Proceedings of the twelfth intentional conference on artificial intelligence and statistics (AISTATS)."},{"key":"5327_CR21","unstructured":"Crammer, K., Dredze, M., & Pereira, F. (2012). Confidence-weighted linear classification for text categorization. Journal of Machine Learning Research."},{"key":"5327_CR22","volume-title":"International conference on machine learning","author":"M. Dredze","year":"2008","unstructured":"Dredze, M., Crammer, K., & Pereira, F. (2008). Confidence-weighted linear classification. In International conference on machine learning."},{"key":"5327_CR23","first-page":"2121","volume":"12","author":"J. C. Duchi","year":"2011","unstructured":"Duchi, J. C., Hazan, E., & Singer, Y. (2011). Adaptive subgradient methods for online learning and stochastic optimization. Journal of Machine Learning Research, 12, 2121\u20132159.","journal-title":"Journal of Machine Learning Research"},{"issue":"3","key":"5327_CR24","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1023\/A:1007662407062","volume":"37","author":"Y. Freund","year":"1999","unstructured":"Freund, Y., & Schapire, R. E. (1999). Large margin classification using the perceptron algorithm. Machine Learning, 37(3), 277\u2013296.","journal-title":"Machine Learning"},{"key":"5327_CR25","volume-title":"IEEE 11th international conference on computer vision","author":"A. Frome","year":"2007","unstructured":"Frome, A., Singer, Y., Sha, F., & Malik, J. (2007). Learning globally-consistent local distance functions for shape-based image retrieval and classification. In IEEE 11th international conference on computer vision."},{"issue":"3","key":"5327_CR26","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1023\/A:1026319107706","volume":"53","author":"C. Gentile","year":"2003","unstructured":"Gentile, C. (2003). The robustness of the p-norm algorithms. Machine Learning, 53(3), 265\u2013299.","journal-title":"Machine Learning"},{"key":"5327_CR27","volume-title":"Association for computational linguistics (ACL)","author":"V. Ha-Thuc","year":"2011","unstructured":"Ha-Thuc, V., & Cancedda, N. (2011). Confidence-weighted learning of factored discriminative language models. In Association for computational linguistics (ACL)."},{"key":"5327_CR28","volume-title":"7th pacific-Asia conference on knowledge discovery and data mining (PAKDD)","author":"E. Harrington","year":"2003","unstructured":"Harrington, E., Herbrich, R., Kivinen, J., Platt, J., & Williamson, R. (2003). Online Bayes point machines. In 7th pacific-Asia conference on knowledge discovery and data mining (PAKDD)."},{"key":"5327_CR29","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21606-5","volume-title":"The elements of statistical learning: data mining, inference, and prediction","author":"T. Hastie","year":"2001","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. (2001). The elements of statistical learning: data mining, inference, and prediction. Berlin: Springer."},{"key":"5327_CR30","volume-title":"Adaptive filter theory","author":"S. Haykin","year":"1996","unstructured":"Haykin, S. (1996). Adaptive filter theory. New York: Prentice Hall."},{"key":"5327_CR31","unstructured":"Hazan, E. (2006). Efficient algorithms for online convex optimization and their applications. PhD thesis, Princeton University."},{"key":"5327_CR32","first-page":"245","volume":"1","author":"R. Herbrich","year":"2001","unstructured":"Herbrich, R., Graepel, T., & Campbell, C. (2001). Bayes point machines. Journal of Machine Learning Research, 1, 245\u2013279.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR33","unstructured":"Jaakkola, T., Meila, M., & Jebara, T. (1999). Maximum entropy discrimination."},{"key":"5327_CR34","series-title":"Lecture notes in computer science","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1007\/978-3-642-12307-8_25","volume-title":"Computer vision \u2013 ACCV 2009","author":"L. Jie","year":"2010","unstructured":"Jie, L., Orabona, F., & Caputo, B. (2010). An online framework for learning novel concepts over multiple cues. In H. Zha, R. Taniguchi, & S. Maybank (Eds.), Lecture notes in computer science: Vol.\u00a05994. Computer vision \u2013 ACCV 2009 (pp. 269\u2013280). Berlin: Springer. doi: 10.1007\/978-3-642-12307-8_25 ."},{"key":"5327_CR35","first-page":"227","volume":"8","author":"R. Khardon","year":"2007","unstructured":"Khardon, R., & Wachman, G. (2007). Noise tolerant variants of the perceptron algorithm. Journal of Machine Learning Research, 8, 227\u2013248.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"5327_CR36","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1006\/inco.1996.2612","volume":"132","author":"J. Kivinen","year":"1997","unstructured":"Kivinen, J., & Warmuth, M. K. (1997). Exponentiated gradient versus gradient descent for linear predictors. Information and Computation, 132(1), 1\u201364.","journal-title":"Information and Computation"},{"key":"5327_CR37","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1007\/978-3-540-30115-8_22","volume-title":"Machine learning: ECML 2004","author":"B. Klimt","year":"2004","unstructured":"Klimt, B., & Yang, Y. (2004). The enron corpus: a new dataset for email classification research. In Machine learning: ECML 2004 (pp. 217\u2013226)."},{"key":"5327_CR38","doi-asserted-by":"crossref","first-page":"1119","DOI":"10.1088\/0305-4470\/25\/5\/019","volume":"25","author":"A. Krogh","year":"1992","unstructured":"Krogh, A. (1992). Learning with noise in a linear perceptron. Journal of Physics. A, Mathematical and General, 25, 1119.","journal-title":"Journal of Physics. A, Mathematical and General"},{"key":"5327_CR39","doi-asserted-by":"crossref","first-page":"1135","DOI":"10.1088\/0305-4470\/25\/5\/020","volume":"25","author":"A. Krogh","year":"1992","unstructured":"Krogh, A., & Hertz, J. (1992). Generalization in a linear perceptron in the presence of noise. Journal of Physics. A, Mathematical and General, 25, 1135.","journal-title":"Journal of Physics. A, Mathematical and General"},{"key":"5327_CR40","unstructured":"Le, A., Markopoulou, A., & Faloutsos, M. (2010). Phishdef: Url names say it all. Arxiv preprint arXiv:1009.2275 ."},{"key":"5327_CR41","first-page":"361","volume":"5","author":"D. D. Lewis","year":"2004","unstructured":"Lewis, D. D., Yang, Y., Rose, T. G., & Li, F. (2004). Rcv1: a new benchmark collection for text categorization research. Journal of Machine Learning Research, 5, 361\u2013397.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR42","first-page":"285","volume":"2","author":"N. Littlestone","year":"1988","unstructured":"Littlestone, N. (1988). Learning when irrelevant attributes abound: a new linear-threshold algorithm. Machine Learning, 2, 285\u2013318.","journal-title":"Machine Learning"},{"key":"5327_CR43","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1006\/inco.1994.1009","volume":"108","author":"N. Littlestone","year":"1994","unstructured":"Littlestone, N., & Warmuth, M. K. (1994). The weighted majority algorithm. Information and Computation, 108, 212\u2013261.","journal-title":"Information and Computation"},{"key":"5327_CR44","volume-title":"AIStats","author":"J. Ma","year":"2010","unstructured":"Ma, J., Kulesza, A., Crammer, K., Dredze, M., Saul, L., & Pereira, F. (2010). Exploiting feature covariance in high-dimensional online learning. In: AIStats."},{"key":"5327_CR45","first-page":"91","volume-title":"Proceedings of the 43rd annual meeting on association for computational linguistics","author":"R. McDonald","year":"2005","unstructured":"McDonald, R., Crammer, K., & Pereira, F. (2005). Online large-margin training of dependency parsers. In Proceedings of the 43rd annual meeting on association for computational linguistics (pp. 91\u201398). Stroudsburg: Association for Computational Linguistics."},{"key":"5327_CR46","volume-title":"Proceedings of the twenty third annual conference on learning theory","author":"H. B. McMahan","year":"2010","unstructured":"McMahan, H. B., & Streeter, M. (2010). Adaptive bound optimization for online convex optimization. In Proceedings of the twenty third annual conference on learning theory."},{"key":"5327_CR47","first-page":"971","volume-title":"Proceedings of the 2010 conference on empirical methods in natural language processing","author":"A. Mejer","year":"2010","unstructured":"Mejer, A., & Crammer, K. (2010). Confidence in structured-prediction using confidence-weighted models. In Proceedings of the 2010 conference on empirical methods in natural language processing (pp. 971\u2013981). EMNLP\u201910. Stroudsburg: Association for Computational Linguistics. http:\/\/portal.acm.org\/citation.cfm?id=1870658.1870753 ."},{"key":"5327_CR48","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1098\/rsta.1909.0016","volume":"209","author":"J. Mercer","year":"1909","unstructured":"Mercer, J. (1909). Functions of positive and negative type and their connection with the theory of integral equations. Philosophical Transactions of the Royal Society of London.\u00a0A, 209, 415\u2013446.","journal-title":"Philosophical Transactions of the Royal Society of London.\u00a0A"},{"key":"5327_CR49","volume-title":"Proceedings of the twenty fifth conference on uncertainty in artificial intelligence","author":"T. P. Minka","year":"2009","unstructured":"Minka, T. P., Xiang, R., & Qi, Y. A. (2009). Virtual vector machine for Bayesian online classification. In Proceedings of the twenty fifth conference on uncertainty in artificial intelligence."},{"key":"5327_CR50","volume-title":"Advances in neural information processing systems 24","author":"F. Orabona","year":"2010","unstructured":"Orabona, F., & Crammer, K. (2010). New adaptive algorithms for online classification. In Advances in neural information processing systems 24."},{"key":"5327_CR51","volume-title":"Advances in large margin classifiers","author":"J. C. Platt","year":"1998","unstructured":"Platt, J. C. (1998). Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods. In P. Bartlett, B. Sch\u00f6lkopf, D. Schuurmans, & A.\u00a0J. Smola (Eds.), Advances in large margin classifiers. Cambridge: MIT Press."},{"key":"5327_CR52","volume-title":"Gaussian processes for machine learning","author":"C. E. Rasmussen","year":"2006","unstructured":"Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian processes for machine learning. Cambridge: MIT Press."},{"key":"5327_CR53","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1037\/h0042519","volume":"65","author":"F. Rosenblatt","year":"1958","unstructured":"Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65, 386\u2013407 (Reprinted in Neurocomputing, MIT Press, 1988).","journal-title":"Psychological Review"},{"key":"5327_CR54","volume-title":"AISTATS","author":"A. Saha","year":"2011","unstructured":"Saha, A., Daume, H. III, & Venkatasubramanian, S. (2011). Online learning of multiple tasks and their relationships. In AISTATS."},{"key":"5327_CR55","volume-title":"The New York Times annotated corpus","author":"E. Sandhaus","year":"2008","unstructured":"Sandhaus, E. (2008). The New York Times annotated corpus. Philadelphia: Linguistic Data Consortium."},{"key":"5327_CR56","volume-title":"Artificial intelligence and statistics (AISTATS)","author":"P. Shivaswamy","year":"2007","unstructured":"Shivaswamy, P., & Jebara, T. (2007). Ellipsoidal kernel machines. In Artificial intelligence and statistics (AISTATS)."},{"key":"5327_CR57","series-title":"JMLR","first-page":"733","volume-title":"Proceedings of the thirteenth international conference on artificial intelligence and statistics (AISTATS) 2010","author":"P. Shivaswamy","year":"2010","unstructured":"Shivaswamy, P., & Jebara, T. (2010a). Empirical Bernstein boosting. In Y. Teh & M. Titterington (Eds.), JMLR: Vol.\u00a09. Proceedings of the thirteenth international conference on artificial intelligence and statistics (AISTATS) 2010 (pp. 733\u2013740). W&CP."},{"key":"5327_CR58","first-page":"747","volume":"11","author":"P. K. Shivaswamy","year":"2010","unstructured":"Shivaswamy, P. K., & Jebara, T. (2010b). Maximum relative margin and data-dependent regularization. Journal of Machine Learning Research, 11, 747\u2013788.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR59","first-page":"171","volume-title":"Proceedings of the tenth national conference on artificial intelligence","author":"R. S. Sutton","year":"1992","unstructured":"Sutton, R. S. (1992). Adapting bias by gradient descent: an incremental version of delta-bar-delta. In Proceedings of the tenth national conference on artificial intelligence (pp. 171\u2013176). Cambridge: MIT Press."},{"key":"5327_CR60","first-page":"211","volume":"1","author":"M. E. Tipping","year":"2001","unstructured":"Tipping, M. E. (2001). Sparse Bayesian learning and the relevance vector machine. Journal of Machine Learning Research, 1, 211\u2013244.","journal-title":"Journal of Machine Learning Research"},{"key":"5327_CR61","volume-title":"The 22nd international conference on algorithmic learning theory","author":"N. Vaits","year":"2011","unstructured":"Vaits, N., & Crammer, K. (2011). Re-adapting the regularization of weights for non-stationary regression. In The 22nd international conference on algorithmic learning theory. ALT\u201911."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-013-5327-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-013-5327-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-013-5327-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,11]],"date-time":"2019-07-11T06:15:14Z","timestamp":1562825714000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-013-5327-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,3,22]]},"references-count":61,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2013,5]]}},"alternative-id":["5327"],"URL":"https:\/\/doi.org\/10.1007\/s10994-013-5327-x","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,3,22]]}}}