{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T09:09:29Z","timestamp":1769764169756,"version":"3.49.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T00:00:00Z","timestamp":1743984000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T00:00:00Z","timestamp":1743984000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003130","name":"Research Foundation Flanders","doi-asserted-by":"crossref","award":["G033822N"],"award-info":[{"award-number":["G033822N"]}],"id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003130","name":"Research Foundation Flanders","doi-asserted-by":"crossref","award":["G081222N"],"award-info":[{"award-number":["G081222N"]}],"id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003130","name":"Research Foundation Flanders","doi-asserted-by":"crossref","award":["G0A0920N"],"award-info":[{"award-number":["G0A0920N"]}],"id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Research Council KU Leuven C1","award":["C14\/18\/068"],"award-info":[{"award-number":["C14\/18\/068"]}]},{"name":"KU Leuven Special Research Fund","award":["PDMT1\/22\/023"],"award-info":[{"award-number":["PDMT1\/22\/023"]}]},{"name":"Fonds de la Recherche Scientifique - FNRS and the Fonds Wetenschappelijk Onderzoek - Vlaanderen","award":["30468160"],"award-info":[{"award-number":["30468160"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s10107-025-02204-5","type":"journal-article","created":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T04:15:06Z","timestamp":1744085706000},"page":"801-845","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Anisotropic proximal gradient"],"prefix":"10.1007","volume":"214","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9106-2690","authenticated-orcid":false,"given":"Emanuel","family":"Laude","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Panagiotis","family":"Patrinos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,7]]},"reference":[{"key":"2204_CR1","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1214\/09-EJS521","volume":"4","author":"F Bach","year":"2010","unstructured":"Bach, F.: Self-concordant analysis for logistic regression. Electron. J. Stat. 4, 384\u2013414 (2010)","journal-title":"Electron. J. Stat."},{"issue":"2","key":"2204_CR2","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1137\/0315022","volume":"15","author":"EJ Balder","year":"1977","unstructured":"Balder, E.J.: An extension of duality\u2013stability relations to nonconvex optimization problems. SIAM J. Control. Optim. 15(2), 329\u2013343 (1977)","journal-title":"SIAM J. Control. Optim."},{"issue":"3","key":"2204_CR3","doi-asserted-by":"publisher","first-page":"1253","DOI":"10.1137\/21M1433241","volume":"15","author":"H Bauermeister","year":"2022","unstructured":"Bauermeister, H., Laude, E., M\u00f6llenhoff, T., Moeller, M., Cremers, D.: Lifting the convex conjugate in Lagrangian relaxations: a tractable approach for continuous Markov random fields. SIAM J. Imag. Sci. 15(3), 1253\u20131281 (2022)","journal-title":"SIAM J. Imag. Sci."},{"issue":"4","key":"2204_CR4","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1137\/S1052623402410557","volume":"13","author":"HH Bauschke","year":"2003","unstructured":"Bauschke, H.H., Combettes, P.L.: Iterating Bregman retractions. SIAM J. Optim. 13(4), 1159\u20131173 (2003)","journal-title":"SIAM J. Optim."},{"key":"2204_CR5","doi-asserted-by":"crossref","unstructured":"Bauschke, H. H., Borwein, J. M.: Joint and separate convexity of the Bregman distance. In: Studies in Computational Mathematics, vol. 8. Elsevier, pp. 23\u201336 (2001)","DOI":"10.1016\/S1570-579X(01)80004-5"},{"issue":"3","key":"2204_CR6","doi-asserted-by":"publisher","first-page":"1068","DOI":"10.1007\/s10957-019-01516-9","volume":"182","author":"HH Bauschke","year":"2019","unstructured":"Bauschke, H.H., Bolte, J., Chen, J., Teboulle, M., Wang, X.: On linear convergence of non-Euclidean gradient methods without strong convexity and Lipschitz gradient continuity. J. Optim. Theory Appl. 182(3), 1068\u20131087 (2019)","journal-title":"J. Optim. Theory Appl."},{"issue":"2","key":"2204_CR7","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1287\/moor.2016.0817","volume":"42","author":"HH Bauschke","year":"2017","unstructured":"Bauschke, H.H., Bolte, J., Teboulle, M.: A descent lemma beyond Lipschitz gradient continuity: first-order methods revisited and applications. Math. Oper. Res. 42(2), 330\u2013348 (2017)","journal-title":"Math. Oper. Res."},{"issue":"1","key":"2204_CR8","first-page":"27","volume":"4","author":"HH Bauschke","year":"1997","unstructured":"Bauschke, H.H., Borwein, J.M.: Legendre functions and the method of random Bregman projections. J. Convex Anal. 4(1), 27\u201367 (1997)","journal-title":"J. Convex Anal."},{"key":"2204_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9467-7","volume-title":"Convex Analysis and Monotone Operator Theory in Hilbert Spaces","author":"HH Bauschke","year":"2011","unstructured":"Bauschke, H.H., Combettes, P.L.: Convex Analysis and Monotone Operator Theory in Hilbert Spaces. Springer (2011)"},{"key":"2204_CR10","doi-asserted-by":"crossref","unstructured":"Birnbaum, B., Devanur, N. R., Xiao, L.: Distributed algorithms via gradient descent for Fisher markets. In: Proceedings of the 12th ACM Conference on Electronic Commerce, pp. 127\u2013136 (2011)","DOI":"10.1145\/1993574.1993594"},{"key":"2204_CR11","doi-asserted-by":"crossref","unstructured":"Bonet, C., Uscidda, T., David, A., Aubin-Frankowski, P.-C., Korba, A.: Mirror and Preconditioned Gradient Descent in Wasserstein Space. arXiv preprint arXiv:2406.08938 (2024)","DOI":"10.52202\/079017-0798"},{"issue":"3","key":"2204_CR12","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1007\/s10589-019-00060-6","volume":"72","author":"LM Brice\u00f1o-Arias","year":"2019","unstructured":"Brice\u00f1o-Arias, L.M., Chierchia, G., Chouzenoux, E., Pesquet, J.-C.: A random block-coordinate Douglas\u2013Rachford splitting method with low computational complexity for binary logistic regression. Comput. Optim. Appl. 72(3), 707\u2013726 (2019)","journal-title":"Comput. Optim. Appl."},{"issue":"3","key":"2204_CR13","doi-asserted-by":"publisher","first-page":"1457","DOI":"10.1137\/120889812","volume":"23","author":"JV Burke","year":"2013","unstructured":"Burke, J.V., Hoheisel, T.: Epi-convergent smoothing with applications to convex composite functions. SIAM J. Optim. 23(3), 1457\u20131479 (2013)","journal-title":"SIAM J. Optim."},{"issue":"2","key":"2204_CR14","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1112\/S0025579316000309","volume":"63","author":"A Cabot","year":"2017","unstructured":"Cabot, A., Jourani, A., Thibault, L.: Envelopes for sets and functions: regularization and generalized conjugacy. Mathematika 63(2), 383\u2013432 (2017)","journal-title":"Mathematika"},{"issue":"1","key":"2204_CR15","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1023\/A:1013912006537","volume":"48","author":"M Collins","year":"2002","unstructured":"Collins, M., Schapire, R.E., Singer, Y.: Logistic regression, AdaBoost and Bregman distances. Mach. Learn. 48(1), 253\u2013285 (2002)","journal-title":"Mach. Learn."},{"issue":"1\u20132","key":"2204_CR16","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1007\/s10107-013-0663-y","volume":"139","author":"PL Combettes","year":"2013","unstructured":"Combettes, P.L., Reyes, N.N.: Moreau\u2019s decomposition in Banach spaces. Math. Program. 139(1\u20132), 103\u2013114 (2013)","journal-title":"Math. Program."},{"key":"2204_CR17","unstructured":"Cuturi, M.: Sinkhorn distances: lightspeed computation of optimal transport. In: Advances in neural information processing systems, vol 26 (2013)"},{"key":"2204_CR18","unstructured":"Doikov, N.: Minimizing quasi-self-concordant functions by gradient regularization of Newton method. arXiv preprint arXiv:2308.14742 (2023)"},{"issue":"2","key":"2204_CR19","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1137\/0316018","volume":"16","author":"S Dolecki","year":"1978","unstructured":"Dolecki, S., Kurcyusz, S.: On $$\\Phi $$-convexity in extremal problems. SIAM J. Control. Optim. 16(2), 277\u2013300 (1978)","journal-title":"SIAM J. Control. Optim."},{"key":"2204_CR20","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s10107-003-0374-x","volume":"96","author":"J Eckstein","year":"2003","unstructured":"Eckstein, J.: A practical general approximation criterion for methods of multipliers based on Bregman distances. Math. Program. 96, 61\u201386 (2003)","journal-title":"Math. Program."},{"issue":"1","key":"2204_CR21","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1006\/jcss.1997.1504","volume":"55","author":"Y Freund","year":"1997","unstructured":"Freund, Y., Schapire, R.E.: A decision-theoretic generalization of on-line learning and an application to boosting. J. Comput. Syst. Sci. 55(1), 119\u2013139 (1997)","journal-title":"J. Comput. Syst. Sci."},{"key":"2204_CR22","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/BF01585696","volume":"53","author":"M Fukushima","year":"1992","unstructured":"Fukushima, M.: Equivalent differentiable optimization problems and descent methods for asymmetric variational inequality problems. Math. Program. 53, 99\u2013110 (1992)","journal-title":"Math. Program."},{"key":"2204_CR23","doi-asserted-by":"crossref","unstructured":"Karimi, H., Nutini, J., Schmidt, M.: Linear convergence of gradient and proximal-gradient methods under the Polyak\u2013\u0141ojasiewicz condition. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, pp. 795\u2013811 (2016)","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"2204_CR24","unstructured":"Kim, J., Park, C., Ozdaglar, A., Diakonikolas, J., Ryu, E.K.: Mirror duality in convex optimization. arXiv preprint arXiv:2311.17296 (2023)"},{"key":"2204_CR25","unstructured":"Latafat, P., Themelis, A., Patrinos, P.: On the convergence of adaptive first order methods: proximal gradient and alternating minimization algorithms. arXiv preprint arXiv:2311.18431 (2023)"},{"key":"2204_CR26","unstructured":"Laude, E.: Lower envelopes and lifting for structured nonconvex optimization. PhD thesis. Technical University of Munich (2021)"},{"key":"2204_CR27","unstructured":"Laude, E., Patrinos, P.: Anisotropic proximal gradient. arXiv preprint arXiv:2210.15531 (2022)"},{"issue":"4","key":"2204_CR28","doi-asserted-by":"publisher","first-page":"2721","DOI":"10.1137\/21M1465913","volume":"33","author":"E Laude","year":"2023","unstructured":"Laude, E., Themelis, A., Patrinos, P.: Dualities for non-Euclidean smoothness and strong convexity under the light of generalized conjugacy. SIAM J. Optim. 33(4), 2721\u20132749 (2023)","journal-title":"SIAM J. Optim."},{"key":"2204_CR29","unstructured":"L\u00e9ger, F., Aubin-Frankowski, P.-C.: Gradient descent with a general cost. (2023). arXiv:2305.04917 [math.OC]"},{"issue":"1","key":"2204_CR30","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1137\/16M1099546","volume":"28","author":"H Lu","year":"2018","unstructured":"Lu, H., Freund, R.M., Nesterov, Y.: Relatively smooth convex optimization by first-order methods, and applications. SIAM J. Optim. 28(1), 333\u2013354 (2018)","journal-title":"SIAM J. Optim."},{"issue":"1","key":"2204_CR31","doi-asserted-by":"publisher","first-page":"991","DOI":"10.1137\/19M130858X","volume":"31","author":"CJ Maddison","year":"2021","unstructured":"Maddison, C.J., Paulin, D., Teh, Y.W., Doucet, A.: Dual space preconditioning for gradient descent. SIAM J. Optim. 31(1), 991\u20131016 (2021)","journal-title":"SIAM J. Optim."},{"issue":"2","key":"2204_CR32","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1145\/2992274.2992275","volume":"50","author":"A Maignan","year":"2016","unstructured":"Maignan, A., Scott, T.C.: Fleshing out the generalized Lambert W function. ACM Commun. Comput. Algebra 50(2), 45\u201360 (2016)","journal-title":"ACM Commun. Comput. Algebra"},{"key":"2204_CR33","unstructured":"Malitsky, Y., Mishchenko, K.: Adaptive gradient descent without descent. In: Proceedings of the 37th International Conference on Machine Learning, pp. 6702\u20136712 (2020)"},{"issue":"11","key":"2204_CR34","doi-asserted-by":"publisher","first-page":"7917","DOI":"10.1090\/tran\/6911","volume":"369","author":"I Mez\u0151","year":"2017","unstructured":"Mez\u0151, I., Baricz, \u00c1.: On the generalization of the Lambert W function. Trans. Am. Math. Soc. 369(11), 7917\u20137934 (2017)","journal-title":"Trans. Am. Math. Soc."},{"key":"2204_CR35","unstructured":"Moreau, J.-J.: Fonctionnelles convexes. S \u00e9 minaire Jean Leray, pp. 1\u2013108 (1966)"},{"key":"2204_CR36","unstructured":"Moreau, J.-J.: Inf-convolution, sous-additivit\u00e9, convexit\u00e9 des fonctions num\u00e9riques. J. Math \u00e9 matiques Pures et Appliqu \u00e9 es 33\u201341 (1970)"},{"key":"2204_CR37","doi-asserted-by":"crossref","unstructured":"Patrinos, P., Bemporad, A.: Proximal Newton methods for convex composite optimization. In: 52nd IEEE Conference on Decision and Control. IEEE, pp. 2358\u20132363 (2013)","DOI":"10.1109\/CDC.2013.6760233"},{"key":"2204_CR38","doi-asserted-by":"crossref","unstructured":"Penot, J.-P., Volle, M.: On strongly convex and paraconvex dualities. In: Generalized Convexity and Fractional Programming with Economic Applications. Springer, pp. 198\u2013218 (1990)","DOI":"10.1007\/978-3-642-46709-7_14"},{"issue":"2","key":"2204_CR39","first-page":"268","volume":"12","author":"RT Rockafellar","year":"1974","unstructured":"Rockafellar, R.T.: Augmented Lagrange multiplier functions and duality in nonconvex programming. SIAM J. Optim. 12(2), 268\u2013285 (1974)","journal-title":"SIAM J. Optim."},{"key":"2204_CR40","doi-asserted-by":"publisher","DOI":"10.1515\/9781400873173","volume-title":"Convex Analysis","author":"RT Rockafellar","year":"1970","unstructured":"Rockafellar, R.T.: Convex Analysis. Princeton University Press, New Jersey (1970)"},{"key":"2204_CR41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-02431-3","volume-title":"Variational Analysis","author":"RT Rockafellar","year":"1998","unstructured":"Rockafellar, R.T., Wets, R.J.: Variational Analysis. Springer, New York (1998)"},{"key":"2204_CR42","doi-asserted-by":"crossref","unstructured":"Schapire, R. E., Singer, Y.: Improved boosting algorithms using confidence-rated predictions. In: Proceedings of the Eleventh Annual Conference on Computational Learning Theory, pp. 80\u201391 (1998)","DOI":"10.1145\/279943.279960"},{"issue":"2","key":"2204_CR43","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1287\/moor.25.2.214.12222","volume":"25","author":"MV Solodov","year":"2000","unstructured":"Solodov, M.V., Svaiter, B.F.: An inexact hybrid generalized proximal point algorithm and some new results on the theory of Bregman functions. Math. Oper. Res. 25(2), 214\u2013230 (2000)","journal-title":"Math. Oper. Res."},{"issue":"3","key":"2204_CR44","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1007\/s10589-017-9912-y","volume":"67","author":"L Stella","year":"2017","unstructured":"Stella, L., Themelis, A., Patrinos, P.: Forward-backward quasi-Newton methods for nonsmooth optimization problems. Comput. Optim. Appl. 67(3), 443\u2013487 (2017)","journal-title":"Comput. Optim. Appl."},{"issue":"3","key":"2204_CR45","doi-asserted-by":"publisher","first-page":"670","DOI":"10.1287\/moor.17.3.670","volume":"17","author":"M Teboulle","year":"1992","unstructured":"Teboulle, M.: Entropic proximal mappings with applications to nonlinear programming. Math. Oper. Res. 17(3), 670\u2013690 (1992)","journal-title":"Math. Oper. Res."},{"issue":"1\u20133","key":"2204_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/BF01580598","volume":"60","author":"P Tseng","year":"1993","unstructured":"Tseng, P., Bertsekas, D.P.: On the convergence of the exponential multiplier method for convex programming. Math. Program. 60(1\u20133), 1\u201319 (1993)","journal-title":"Math. Program."},{"key":"2204_CR47","volume-title":"Optimal Transport: Old and New","author":"C Villani","year":"2008","unstructured":"Villani, C.: Optimal Transport: Old and New. Springer (2008)"},{"issue":"2","key":"2204_CR48","doi-asserted-by":"publisher","first-page":"1379","DOI":"10.1137\/21M1442474","volume":"32","author":"X Wang","year":"2022","unstructured":"Wang, X., Bauschke, H.H.: The Bregman proximal average. SIAM J. Optim. 32(2), 1379\u20131401 (2022)","journal-title":"SIAM J. Optim."},{"key":"2204_CR49","unstructured":"Zhang, J., He, T., Sra, S., Jadbabaie, A.: Why gradient clipping accelerates training: a theoretical justification for adaptivity. In: International Conference on Learning Representations (2020)"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-025-02204-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10107-025-02204-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-025-02204-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T07:44:42Z","timestamp":1764575082000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10107-025-02204-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,7]]},"references-count":49,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["2204"],"URL":"https:\/\/doi.org\/10.1007\/s10107-025-02204-5","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,7]]},"assertion":[{"value":"31 October 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}