{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T13:04:30Z","timestamp":1758891870404,"version":"3.44.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030865221"},{"type":"electronic","value":"9783030865238"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86523-8_19","type":"book-chapter","created":{"date-parts":[[2021,9,10]],"date-time":"2021-09-10T06:05:16Z","timestamp":1631253916000},"page":"314-329","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Adaptive Optimizers with Sparse Group Lasso for Neural Networks in CTR Prediction"],"prefix":"10.1007","author":[{"given":"Yun","family":"Yue","sequence":"first","affiliation":[]},{"given":"Yongchao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Suo","family":"Tong","sequence":"additional","affiliation":[]},{"given":"Minghao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chunyang","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Huanjun","family":"Bao","sequence":"additional","affiliation":[]},{"given":"Lihong","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Jinjie","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Yixiang","family":"Mu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,11]]},"reference":[{"key":"19_CR1","unstructured":"Abadi, M., et al.: TensorFlow: a system for large-scale machine learning. In: Keeton, K., Roscoe, T. (eds.) 12th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2016, Savannah, GA, USA, 2\u20134 November 2016, pp. 265\u2013283. USENIX Association (2016). https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/abadi"},{"key":"19_CR2","unstructured":"Avazu: Avazu click-through rate prediction (2015). https:\/\/www.kaggle.com\/c\/avazu-ctr-prediction\/data"},{"key":"19_CR3","unstructured":"Criteo: Criteo display ad challenge (2014). http:\/\/labs.criteo.com\/2014\/02\/kaggle-display-advertising-challenge-dataset"},{"key":"19_CR4","doi-asserted-by":"publisher","first-page":"2121","DOI":"10.5555\/1953048.2021068","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12, 2121\u20132159 (2011). https:\/\/doi.org\/10.5555\/1953048.2021068","journal-title":"J. Mach. Learn. Res."},{"key":"19_CR5","unstructured":"Graepel, T., Candela, J.Q., Borchert, T., Herbrich, R.: Web-scale Bayesian click-through rate prediction for sponsored search advertising in Microsoft\u2019s Bing search engine. In: F\u00fcrnkranz, J., Joachims, T. (eds.) Proceedings of the 27th International Conference on Machine Learning, ICML 2010, Haifa, Israel, 21\u201324 June 2010, pp. 13\u201320. Omnipress (2010). https:\/\/icml.cc\/Conferences\/2010\/papers\/901.pdf"},{"key":"19_CR6","unstructured":"Gupta, V., Koren, T., Singer, Y.: Shampoo: preconditioned stochastic tensor optimization. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, 10\u201315 July 2018, vol. 80, pp. 1837\u20131845. PMLR (2018). http:\/\/proceedings.mlr.press\/v80\/gupta18a.html"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Liao, H., Peng, L., Liu, Z., Shen, X.: IPinYou global RTB bidding algorithm competition (2013). https:\/\/www.kaggle.com\/lastsummer\/ipinyou","DOI":"10.1145\/2648584.2648590"},{"key":"19_CR8","unstructured":"Kingma, D.P., Ba, J.L.: Adam: a method for stochastic optimization. In: Proceedings of the 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA (2015)"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Littlestone, N.: From on-line to batch learning. In: Rivest, R.L., Haussler, D., Warmuth, M.K. (eds.) Proceedings of the 2nd Annual Workshop on Computational Learning Theory, COLT 1989, Santa Cruz, CA, USA, 31 July\u20132 August 1989, pp. 269\u2013284. Morgan Kaufmann (1989). http:\/\/dl.acm.org\/citation.cfm?id=93365","DOI":"10.1016\/B978-0-08-094829-4.50022-2"},{"key":"19_CR10","unstructured":"McMahan, H.B.: Follow-the-regularized-leader and mirror descent: equivalence theorems and L1 regularization. In: Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, AISTATS 2011, Fort Lauderdale, FL, USA, vol. 15, pp. 525\u2013533. PMLR (2011)"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"McMahan, H.B., et al.: Ad click prediction: a view from the trenches. In: Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2013, Chicago, Illinois, USA, pp. 1222\u20131230. ACM (2013)","DOI":"10.1145\/2487575.2488200"},{"key":"19_CR12","unstructured":"McMahan, H.B., Streeter, M.J.: Adaptive bound optimization for online convex optimization. In: The 23rd Conference on Learning Theory, COLT 2010, Haifa, Israel, 27\u201329 June 2010, pp. 244\u2013256. Omnipress (2010). http:\/\/colt2010.haifa.il.ibm.com\/papers\/COLT2010proceedings.pdf#page=252"},{"key":"19_CR13","unstructured":"Naumov, M., et al.: Deep learning recommendation model for personalization and recommendation systems. CoRR abs\/1906.00091 (2019). http:\/\/arxiv.org\/abs\/1906.00091"},{"key":"19_CR14","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s10107-004-0552-5","volume":"103","author":"YE Nesterov","year":"2005","unstructured":"Nesterov, Y.E.: Smooth minimization of non-smooth functions. Math. Program. 103, 127\u2013152 (2005)","journal-title":"Math. Program."},{"issue":"1","key":"19_CR15","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s10107-007-0149-x","volume":"120","author":"YE Nesterov","year":"2009","unstructured":"Nesterov, Y.E.: Primal-dual subgradient methods for convex problems. Math. Program. 120(1), 221\u2013259 (2009). https:\/\/doi.org\/10.1007\/s10107-007-0149-x","journal-title":"Math. Program."},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Ni, X., et al.: Feature selection for Facebook feed ranking system via a group-sparsity-regularized training algorithm. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, CIKM 2019, Beijing, China, pp. 2085\u20132088. ACM (2019)","DOI":"10.1145\/3357384.3358114"},{"issue":"5","key":"19_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak, B.T.: Some methods of speeding up the convergence of iteration methods. USSR Comput. Math. Math. Phys. 4(5), 1\u201317 (1964). https:\/\/doi.org\/10.1016\/0041-5553(64)90137-5","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Qu, Y., et al.: Product-based neural networks for user response prediction. In: Bonchi, F., Domingo-Ferrer, J., Baeza-Yates, R., Zhou, Z., Wu, X. (eds.) IEEE 16th International Conference on Data Mining, ICDM 2016, Barcelona, Spain, 12\u201315 December 2016, pp. 1149\u20131154. IEEE Computer Society (2016). https:\/\/doi.org\/10.1109\/ICDM.2016.0151","DOI":"10.1109\/ICDM.2016.0151"},{"key":"19_CR19","unstructured":"Reddi, S.J., Kale, S., Kumar, S.: On the convergence of Adam and beyond. In: Proceedings of the 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada. OpenReview.net (2018)"},{"issue":"3","key":"19_CR20","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Statist. 22(3), 400\u2013407 (1951)","journal-title":"Ann. Math. Statist."},{"key":"19_CR21","unstructured":"Rockafellar, R.T.: Convex Analysis (Princeton Landmarks in Mathematics and Physics). Princeton University Press (1970)"},{"key":"19_CR22","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.neucom.2017.02.029","volume":"241","author":"S Scardapane","year":"2016","unstructured":"Scardapane, S., Comminiello, D., Hussain, A., Uncini, A.: Group sparse regularization for deep neural networks. Neurocomputing 241, 43\u201352 (2016). https:\/\/doi.org\/10.1016\/j.neucom.2017.02.029","journal-title":"Neurocomputing"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Wang, R., Fu, B., Fu, G., Wang, M.: Deep & cross network for ad click predictions. In: Proceedings of the ADKDD 2017, Halifax, NS, Canada, 13\u201317 August 2017, pp. 12:1\u201312:7. ACM (2017). https:\/\/doi.org\/10.1145\/3124749.3124754","DOI":"10.1145\/3124749.3124754"},{"key":"19_CR24","doi-asserted-by":"publisher","first-page":"2543","DOI":"10.5555\/1756006.1953017","volume":"11","author":"L Xiao","year":"2010","unstructured":"Xiao, L.: Dual averaging method for regularized stochastic learning and online optimization. J. Mach. Learn. Res. 11, 2543\u20132596 (2010). https:\/\/doi.org\/10.5555\/1756006.1953017","journal-title":"J. Mach. Learn. Res."},{"key":"19_CR25","unstructured":"Yang, H., Xu, Z., King, I., Lyu, M.R.: Online learning for group lasso. In: Proceedings of the 27th International Conference on Machine Learning, ICML 2010, Haifa, Israel, pp. 1191\u20131198. Omnipress (2010)"},{"key":"19_CR26","unstructured":"Yao, Z., Gholami, A., Shen, S., Keutzer, K., Mahoney, M.W.: ADAHESSIAN: an adaptive second order optimizer for machine learning. CoRR abs\/2006.00719 (2020). https:\/\/arxiv.org\/abs\/2006.00719"},{"key":"19_CR27","unstructured":"Zeiler, M.D.: ADADELTA: an adaptive learning rate method. CoRR abs\/1212.5701 (2012). https:\/\/arxiv.org\/abs\/1212.5701"},{"key":"19_CR28","unstructured":"Zhu, M., Gupta, S.: To prune, or not to prune: exploring the efficacy of pruning for model compression. In: 6th International Conference on Learning Representations, ICLR 2018, Workshop Track Proceedings Vancouver, BC, Canada, 30 April\u20133 May 2018. OpenReview.net (2018). https:\/\/openreview.net\/forum?id=Sy1iIDkPM"},{"key":"19_CR29","unstructured":"Appendix. https:\/\/github.com\/yadandan\/adaptive_optimizers_with_sparse_group_lasso\/blob\/master\/appendix.pdf"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86523-8_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T22:03:56Z","timestamp":1757455436000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86523-8_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030865221","9783030865238"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86523-8_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"11 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}