{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:46:52Z","timestamp":1740124012468,"version":"3.37.3"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,3,10]],"date-time":"2023-03-10T00:00:00Z","timestamp":1678406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,10]],"date-time":"2023-03-10T00:00:00Z","timestamp":1678406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Optim Theory Appl"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s10957-023-02180-w","type":"journal-article","created":{"date-parts":[[2023,3,10]],"date-time":"2023-03-10T17:02:51Z","timestamp":1678467771000},"page":"239-289","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Stochastic Composition Optimization of Functions Without Lipschitz Continuous Gradient"],"prefix":"10.1007","volume":"198","author":[{"given":"Yin","family":"Liu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4776-0440","authenticated-orcid":false,"given":"Sam","family":"Davanloo Tajbakhsh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,10]]},"reference":[{"issue":"1","key":"2180_CR1","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1137\/19M1264783","volume":"31","author":"M Ahookhosh","year":"2021","unstructured":"Ahookhosh, M., Themelis, A., Patrinos, P.: A Bregman forward-backward linesearch algorithm for nonconvex composite optimization: superlinear convergence to nonisolated local minima. SIAM J. Optim. 31(1), 653\u2013685 (2021). https:\/\/doi.org\/10.1137\/19M1264783","journal-title":"SIAM J. Optim."},{"issue":"1","key":"2180_CR2","doi-asserted-by":"publisher","first-page":"1166","DOI":"10.1137\/16M106306X","volume":"5","author":"A Alexanderian","year":"2017","unstructured":"Alexanderian, A., Petra, N., Stadler, G., Ghattas, O.: Mean-variance risk-averse optimal control of systems governed by PDEs with random parameter fields using quadratic approximations. SIAM\/ASA J. Uncertain. Quant. 5(1), 1166\u20131192 (2017). https:\/\/doi.org\/10.1137\/16M106306X","journal-title":"SIAM\/ASA J. Uncertain. Quant."},{"key":"2180_CR3","unstructured":"Asi, H., Duchi, J.C.: Modeling simple structures and geometry for better stochastic optimization algorithms. In: Chaudhuri, K., Sugiyama, M. (eds) The 22nd International Conference on Artificial Intelligence and Statistics, AISTATS 2019, 16-18 April 2019, Naha, Okinawa, Japan, Proceedings of Machine Learning Research, vol.\u00a089, pp. 2425\u20132434. PMLR (2019)"},{"issue":"1","key":"2180_CR4","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10107-007-0147-z","volume":"120","author":"A Auslender","year":"2009","unstructured":"Auslender, A., Teboulle, M.: Projected subgradient methods with non-Euclidean distances for non-differentiable convex minimization and variational inequalities. Math. Program. 120(1), 27\u201348 (2009). https:\/\/doi.org\/10.1007\/s10107-007-0147-z","journal-title":"Math. Program."},{"issue":"2","key":"2180_CR5","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1137\/21M1406222","volume":"32","author":"K Balasubramanian","year":"2022","unstructured":"Balasubramanian, K., Ghadimi, S., Nguyen, A.: Stochastic multilevel composition optimization algorithms with level-independent convergence rates. SIAM J. Optim. 32(2), 519\u2013544 (2022). https:\/\/doi.org\/10.1137\/21M1406222","journal-title":"SIAM J. Optim."},{"issue":"3","key":"2180_CR6","doi-asserted-by":"publisher","first-page":"1068","DOI":"10.1007\/s10957-019-01516-9","volume":"182","author":"HH Bauschke","year":"2019","unstructured":"Bauschke, H.H., Bolte, J., Chen, J., Teboulle, M., Wang, X.: On linear convergence of non-Euclidean gradient methods without strong convexity and Lipschitz gradient continuity. J. Optim. Theory Appl. 182(3), 1068\u20131087 (2019). https:\/\/doi.org\/10.1007\/s10957-019-01516-9","journal-title":"J. Optim. Theory Appl."},{"issue":"2","key":"2180_CR7","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1287\/moor.2016.0817","volume":"42","author":"HH Bauschke","year":"2017","unstructured":"Bauschke, H.H., Bolte, J., Teboulle, M.: A descent lemma beyond Lipschitz gradient continuity: first-order methods revisited and applications. Math. Oper. Res. 42(2), 330\u2013348 (2017). https:\/\/doi.org\/10.1287\/moor.2016.0817","journal-title":"Math. Oper. Res."},{"issue":"3","key":"2180_CR8","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1016\/S0167-6377(02)00231-6","volume":"31","author":"A Beck","year":"2003","unstructured":"Beck, A., Teboulle, M.: Mirror descent and nonlinear projected subgradient methods for convex optimization. Oper. Res. Lett. 31(3), 167\u2013175 (2003). https:\/\/doi.org\/10.1016\/S0167-6377(02)00231-6","journal-title":"Oper. Res. Lett."},{"issue":"1","key":"2180_CR9","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1137\/S1052623499354564","volume":"12","author":"A Ben-Tal","year":"2001","unstructured":"Ben-Tal, A., Margalit, T., Nemirovski, A.: The ordered subsets mirror descent optimization method with applications to tomography. SIAM J. Optim. 12(1), 79\u2013108 (2001). https:\/\/doi.org\/10.1137\/S1052623499354564","journal-title":"SIAM J. Optim."},{"issue":"1","key":"2180_CR10","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.csda.2006.11.006","volume":"52","author":"MW Berry","year":"2007","unstructured":"Berry, M.W., Browne, M., Langville, A.N., Pauca, V.P., Plemmons, R.J.: Algorithms and applications for approximate nonnegative matrix factorization. Comput. Stat. Data Anal 52(1), 155\u2013173 (2007). https:\/\/doi.org\/10.1016\/j.csda.2006.11.006","journal-title":"Comput. Stat. Data Anal"},{"key":"2180_CR11","unstructured":"Blanchet, J., Goldfarb, D., Iyengar, G., Li, F., Zhou, C.: Unbiased simulation for optimizing stochastic function compositions. arXiv:1711.07564 (2017)"},{"issue":"3","key":"2180_CR12","doi-asserted-by":"publisher","first-page":"2131","DOI":"10.1137\/17M1138558","volume":"28","author":"J Bolte","year":"2018","unstructured":"Bolte, J., Sabach, S., Teboulle, M., Vaisbourd, Y.: First order methods beyond convexity and Lipschitz gradient continuity with applications to quadratic inverse problems. SIAM J. Optim. 28(3), 2131\u20132151 (2018). https:\/\/doi.org\/10.1137\/17M1138558","journal-title":"SIAM J. Optim."},{"issue":"3","key":"2180_CR13","doi-asserted-by":"publisher","first-page":"2172","DOI":"10.1137\/070711311","volume":"31","author":"A Borz\u00ec","year":"2009","unstructured":"Borz\u00ec, A., von Winckel, G.: Multigrid methods and sparse-grid collocation techniques for parabolic optimal control problems with random coefficients. SIAM J. Sci. Comput. 31(3), 2172\u20132192 (2009). https:\/\/doi.org\/10.1137\/070711311","journal-title":"SIAM J. Sci. Comput."},{"issue":"3\u20134","key":"2180_CR14","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1561\/2200000050","volume":"8","author":"S Bubeck","year":"2015","unstructured":"Bubeck, S.: Convex optimization: algorithms and complexity. Found\u00ae Trends Mach Learn 8(3\u20134), 231\u2013357 (2015). https:\/\/doi.org\/10.1561\/2200000050","journal-title":"Found\u00ae Trends Mach Learn"},{"issue":"3","key":"2180_CR15","doi-asserted-by":"publisher","first-page":"538","DOI":"10.1137\/0803026","volume":"3","author":"G Chen","year":"1993","unstructured":"Chen, G., Teboulle, M.: Convergence analysis of a proximal-like minimization algorithm using Bregman functions. SIAM J. Optim. 3(3), 538\u2013543 (1993). https:\/\/doi.org\/10.1137\/0803026","journal-title":"SIAM J. Optim."},{"key":"2180_CR16","doi-asserted-by":"publisher","first-page":"4937","DOI":"10.1109\/TSP.2021.3092377","volume":"69","author":"T Chen","year":"2021","unstructured":"Chen, T., Sun, Y., Yin, W.: Solving stochastic compositional optimization is nearly as easy as solving stochastic optimization. IEEE Trans. Signal Process. 69, 4937\u20134948 (2021). https:\/\/doi.org\/10.1109\/TSP.2021.3092377","journal-title":"IEEE Trans. Signal Process."},{"key":"2180_CR17","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/11679363_5","volume-title":"Independent Component Analysis and Blind Signal Separation","author":"A Cichocki","year":"2006","unstructured":"Cichocki, A., Zdunek, R., Amari, S.I.: Csisz\u00e1r\u2019s divergences for non-negative matrix factorization: family of new algorithms. In: Rosca, J., Erdogmus, D., Pr\u00edncipe, J.C., Haykin, S. (eds.) Independent Component Analysis and Blind Signal Separation, pp. 32\u201339. Springer, Berlin (2006)"},{"issue":"4","key":"2180_CR18","doi-asserted-by":"publisher","first-page":"2032","DOI":"10.1214\/aos\/1176348385","volume":"19","author":"I Csiszar","year":"1991","unstructured":"Csiszar, I.: Why least squares and maximum entropy? An axiomatic approach to inference for linear inverse problems. Ann. Stat. 19(4), 2032\u20132066 (1991). https:\/\/doi.org\/10.1214\/aos\/1176348385","journal-title":"Ann. Stat."},{"key":"2180_CR19","unstructured":"Dai, B., He, N., Pan, Y., Boots, B., Song, L.: Learning from conditional distributions via dual embeddings. In: Singh, A., Zhu, X.J. (eds.) Proceedings of the 20th International Conference on Artificial Intelligence and Statistics, AISTATS 2017, 20-22 April 2017, Fort Lauderdale, FL, USA, Proceedings of Machine Learning Research, vol.\u00a054, pp. 1458\u20131467. PMLR (2017)"},{"issue":"24","key":"2180_CR20","first-page":"809","volume":"15","author":"C Dann","year":"2014","unstructured":"Dann, C., Neumann, G., Peters, J.: Policy evaluation with temporal differences: A survey and comparison. J. Mach. Learn. Res. 15(24), 809\u2013883 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"2180_CR21","unstructured":"Davis, D., Drusvyatskiy, D., MacPhee, K.J.: Stochastic model-based minimization under high-order growth. arXiv:1807.00255 (2018)"},{"issue":"4","key":"2180_CR22","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1007\/s10463-016-0559-8","volume":"69","author":"D Dentcheva","year":"2017","unstructured":"Dentcheva, D., Penev, S., Ruszczy\u0144ski, A.: Statistical estimation of composite risk functionals and risk optimization problems. Ann. Inst. Stat. Math. 69(4), 737\u2013760 (2017). https:\/\/doi.org\/10.1007\/s10463-016-0559-8","journal-title":"Ann. Inst. Stat. Math."},{"key":"2180_CR23","unstructured":"Devraj, A.M., Chen, J.: Stochastic variance reduced primal dual algorithms for empirical composition optimization. In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8\u201314, 2019, Vancouver, BC, Canada, pp. 9878\u20139888 (2019)"},{"key":"2180_CR24","unstructured":"Dragomir, R.A., Even, M., Hendrikx, H.: Fast stochastic Bregman gradient methods: Sharp analysis and variance reduction. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol. 139, pp. 2815\u20132825. PMLR (2021)"},{"issue":"1","key":"2180_CR25","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s10107-021-01618-1","volume":"194","author":"RA Dragomir","year":"2022","unstructured":"Dragomir, R.A., Taylor, A.B., d\u2019Aspremont, A., Bolte, J.: Optimal complexity and certification of Bregman first-order methods. Math. Program. 194(1), 41\u201383 (2022). https:\/\/doi.org\/10.1007\/s10107-021-01618-1","journal-title":"Math. Program."},{"key":"2180_CR26","unstructured":"Duchi, J.C., Shalev-Shwartz, S., Singer, Y., Tewari, A.: Composite objective mirror descent. In: Kalai, A.T., Mohri, M. (eds.) COLT 2010\u2014The 23rd Conference on Learning Theory, Haifa, Israel, June 27\u201329, 2010, pp. 14\u201326. Omnipress (2010)"},{"key":"2180_CR27","unstructured":"Ermoliev, Y.: Stochastic Programming Methods. Nauka (1976)"},{"issue":"4","key":"2180_CR28","doi-asserted-by":"publisher","first-page":"2231","DOI":"10.1137\/120863277","volume":"23","author":"YM Ermoliev","year":"2013","unstructured":"Ermoliev, Y.M., Norkin, V.I.: Sample average approximation method for compound stochastic optimization problems. SIAM J. Optim. 23(4), 2231\u20132263 (2013). https:\/\/doi.org\/10.1137\/120863277","journal-title":"SIAM J. Optim."},{"key":"2180_CR29","unstructured":"Fang, C., Li, C.J., Lin, Z., Zhang, T.: Spider: Near-optimal non-convex optimization via stochastic path-integrated differential estimator. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a031. Curran Associates, Inc. (2018)"},{"key":"2180_CR30","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6\u201311 August 2017, Proceedings of Machine Learning Research, vol.\u00a070, pp. 1126\u20131135. PMLR (2017)"},{"key":"2180_CR31","unstructured":"Ge, R., Huang, F., Jin, C., Yuan, Y.: Escaping from saddle points - online stochastic gradient for tensor decomposition. In: Gr\u00fcnwald, P., Hazan, E., Kale, S. (eds.) Proceedings of The 28th Conference on Learning Theory, COLT 2015, Paris, France, July 3-6, 2015, JMLR Workshop and Conference Proceedings, vol.\u00a040, pp. 797\u2013842. JMLR.org (2015)"},{"issue":"1","key":"2180_CR32","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1), 267\u2013305 (2016). https:\/\/doi.org\/10.1007\/s10107-014-0846-1","journal-title":"Math. Program."},{"issue":"1","key":"2180_CR33","doi-asserted-by":"publisher","first-page":"960","DOI":"10.1137\/18M1230542","volume":"30","author":"S Ghadimi","year":"2020","unstructured":"Ghadimi, S., Ruszczy\u0144ski, A., Wang, M.: A single timescale stochastic approximation method for nested stochastic optimization. SIAM J. Optim. 30(1), 960\u2013979 (2020). https:\/\/doi.org\/10.1137\/18M1230542","journal-title":"SIAM J. Optim."},{"issue":"3","key":"2180_CR34","doi-asserted-by":"publisher","first-page":"717","DOI":"10.1007\/s10589-021-00284-5","volume":"79","author":"F Hanzely","year":"2021","unstructured":"Hanzely, F., Richt\u00e1rik, P.: Fastest rates for stochastic mirror descent methods. Comput. Optim. Appl. 79(3), 717\u2013766 (2021). https:\/\/doi.org\/10.1007\/s10589-021-00284-5","journal-title":"Comput. Optim. Appl."},{"issue":"2","key":"2180_CR35","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/s10589-021-00273-8","volume":"79","author":"F Hanzely","year":"2021","unstructured":"Hanzely, F., Richt\u00e1rik, P., Xiao, L.: Accelerated Bregman proximal gradient methods for relatively smooth convex optimization. Comput. Optim. Appl. 79(2), 405\u2013440 (2021). https:\/\/doi.org\/10.1007\/s10589-021-00273-8","journal-title":"Comput. Optim. Appl."},{"issue":"1","key":"2180_CR36","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1007\/s10107-020-01486-1","volume":"187","author":"B Hu","year":"2021","unstructured":"Hu, B., Seiler, P., Lessard, L.: Analysis of biased stochastic gradient descent using sequential semidefinite programs. Math. Program. 187(1), 383\u2013408 (2021). https:\/\/doi.org\/10.1007\/s10107-020-01486-1","journal-title":"Math. Program."},{"key":"2180_CR37","unstructured":"Hu, Y., Zhang, S., Chen, X., He, N.: Biased stochastic first-order methods for conditional stochastic optimization and applications in meta learning. In: Larochelle, H., \u00a0Ranzato, M., \u00a0Hadsell, R., \u00a0Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 2759\u20132770. Curran Associates, Inc. (2020)"},{"issue":"9","key":"2180_CR38","first-page":"121","volume":"30","author":"A Juditsky","year":"2011","unstructured":"Juditsky, A., Nemirovski, A., et al.: First order methods for nonsmooth convex large-scale optimization, I: general purpose methods. Optim. Mach. Learn. 30(9), 121\u2013148 (2011)","journal-title":"Optim. Mach. Learn."},{"issue":"6755","key":"2180_CR39","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1038\/44565","volume":"401","author":"DD Lee","year":"1999","unstructured":"Lee, D.D., Seung, H.S.: Learning the parts of objects by non-negative matrix factorization. Nature 401(6755), 788\u2013791 (1999). https:\/\/doi.org\/10.1038\/44565","journal-title":"Nature"},{"key":"2180_CR40","unstructured":"Li, Q., Zhu, Z., Tang, G., Wakin, M.B.: Provable Bregman-divergence based methods for nonconvex and non-Lipschitz problems. arXiv:1904.09712 (2019)"},{"key":"2180_CR41","unstructured":"Lian, X., Wang, M., Liu, J.: Finite-sum composition optimization via variance reduced gradient descent. In: Singh, A., Zhu, X.J. (eds.) Proceedings of the 20th International Conference on Artificial Intelligence and Statistics, AISTATS 2017, 20-22 April 2017, Fort Lauderdale, FL, USA, Proceedings of Machine Learning Research, vol.\u00a054, pp. 1159\u20131167. PMLR (2017)"},{"key":"2180_CR42","doi-asserted-by":"publisher","unstructured":"Lin, T., Fan, C., Wang, M., Jordan, M.I.: Improved sample complexity for stochastic compositional variance reduced gradient. In: 2020 American Control Conference (ACC), pp. 126\u2013131 (2020). https:\/\/doi.org\/10.23919\/ACC45564.2020.9147515","DOI":"10.23919\/ACC45564.2020.9147515"},{"issue":"4","key":"2180_CR43","doi-asserted-by":"publisher","first-page":"1205","DOI":"10.1109\/TNNLS.2018.2866699","volume":"30","author":"L Liu","year":"2019","unstructured":"Liu, L., Liu, J., Tao, D.: Dualityfree methods for stochastic composition optimization. IEEE Trans. Neural Netw. Learn. Syst. 30(4), 1205\u20131217 (2019). https:\/\/doi.org\/10.1109\/TNNLS.2018.2866699","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"1","key":"2180_CR44","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1137\/16M1099546","volume":"28","author":"H Lu","year":"2018","unstructured":"Lu, H., Freund, R.M., Nesterov, Y.: Relatively smooth convex optimization by first-order methods, and applications. SIAM J. Optim. 28(1), 333\u2013354 (2018). https:\/\/doi.org\/10.1137\/16M1099546","journal-title":"SIAM J. Optim."},{"key":"2180_CR45","doi-asserted-by":"publisher","unstructured":"Luo, X., Liu, Z., Xiao, S., Xie, X., Li, D.: Mindsim: user simulator for news recommenders. In: Proceedings of the ACM Web Conference 2022, WWW \u201922, pp. 2067\u20132077. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3485447.3512080","DOI":"10.1145\/3485447.3512080"},{"issue":"3","key":"2180_CR46","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1137\/19M1298007","volume":"2","author":"MC Mukkamala","year":"2020","unstructured":"Mukkamala, M.C., Ochs, P., Pock, T., Sabach, S.: Convex-concave backtracking for inertial Bregman proximal gradient algorithms in nonconvex optimization. SIAM J. Math. Data Sci. 2(3), 658\u2013682 (2020). https:\/\/doi.org\/10.1137\/19M1298007","journal-title":"SIAM J. Math. Data Sci."},{"issue":"4","key":"2180_CR47","doi-asserted-by":"publisher","first-page":"1574","DOI":"10.1137\/070704277","volume":"19","author":"A Nemirovski","year":"2009","unstructured":"Nemirovski, A., Juditsky, A., Lan, G., Shapiro, A.: Robust stochastic approximation approach to stochastic programming. SIAM J. Optim. 19(4), 1574\u20131609 (2009). https:\/\/doi.org\/10.1137\/070704277","journal-title":"SIAM J. Optim."},{"key":"2180_CR48","volume-title":"Problem Complexity and Method Efficiency in Optimization","author":"AS Nemirovskij","year":"1983","unstructured":"Nemirovskij, A.S., Yudin, D.B.: Problem Complexity and Method Efficiency in Optimization. Wiley, New York (1983)"},{"key":"2180_CR49","unstructured":"Nguyen, L.M., Liu, J., Scheinberg, K., Tak\u00e1c, M.: SARAH: A novel method for machine learning problems using stochastic recursive gradient. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6\u201311 August 2017, Proceedings of Machine Learning Research, vol.\u00a070, pp. 2613\u20132621. PMLR (2017)"},{"issue":"2","key":"2180_CR50","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1002\/env.3170050203","volume":"5","author":"P Paatero","year":"1994","unstructured":"Paatero, P., Tapper, U.: Positive matrix factorization: a non-negative factor model with optimal utilization of error estimates of data values. Environmetrics 5(2), 111\u2013126 (1994). https:\/\/doi.org\/10.1002\/env.3170050203","journal-title":"Environmetrics"},{"issue":"3","key":"2180_CR51","doi-asserted-by":"publisher","first-page":"2301","DOI":"10.1137\/20M1312952","volume":"59","author":"A Ruszczy\u0144ski","year":"2021","unstructured":"Ruszczy\u0144ski, A.: A stochastic subgradient method for nonsmooth nonconvex multilevel composition optimization. SIAM J. Control. Optim. 59(3), 2301\u20132320 (2021). https:\/\/doi.org\/10.1137\/20M1312952","journal-title":"SIAM J. Control. Optim."},{"key":"2180_CR52","doi-asserted-by":"publisher","unstructured":"Ruszczy\u0144ski, A., Shapiro, A.: Chapter 6: risk averse optimization. In: SIAM, pp. 223\u2013305 (2021). https:\/\/doi.org\/10.1137\/1.9781611976595.ch6","DOI":"10.1137\/1.9781611976595.ch6"},{"key":"2180_CR53","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"issue":"1","key":"2180_CR54","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/s10107-018-1284-2","volume":"170","author":"M Teboulle","year":"2018","unstructured":"Teboulle, M.: A simplified view of first order methods for optimization. Math. Program. 170(1), 67\u201396 (2018). https:\/\/doi.org\/10.1007\/s10107-018-1284-2","journal-title":"Math. Program."},{"issue":"1","key":"2180_CR55","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10107-016-1017-3","volume":"161","author":"M Wang","year":"2017","unstructured":"Wang, M., Fang, E.X., Liu, H.: Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions. Math. Program. 161(1), 419\u2013449 (2017). https:\/\/doi.org\/10.1007\/s10107-016-1017-3","journal-title":"Math. Program."},{"key":"2180_CR56","unstructured":"Wang, M., Liu, J., Fang, E.X.: Accelerating stochastic composition optimization. In: Lee, D.D., Sugiyama, M., von Luxburg, U., Guyon, I., Garnett, R. (eds.) Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5\u201310, 2016, Barcelona, Spain, pp. 1714\u20131722 (2016)"},{"key":"2180_CR57","unstructured":"Wang, Z., Ji, K., Zhou, Y., Liang, Y., Tarokh, V.: Spiderboost and momentum: Faster variance reduction algorithms. In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8\u201314, 2019, Vancouver, BC, Canada, pp. 2403\u20132413 (2019)"},{"issue":"4","key":"2180_CR58","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1287\/ijoo.2021.0055","volume":"3","author":"Y Xu","year":"2021","unstructured":"Xu, Y., Xu, Y.: Katyusha acceleration for convex finite-sum compositional optimization. INFORMS J. Optim. 3(4), 418\u2013443 (2021). https:\/\/doi.org\/10.1287\/ijoo.2021.0055","journal-title":"INFORMS J. Optim."},{"issue":"1","key":"2180_CR59","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1137\/18M1164846","volume":"29","author":"S Yang","year":"2019","unstructured":"Yang, S., Wang, M., Fang, E.X.: Multilevel stochastic gradient methods for nested composition optimization. SIAM J. Optim. 29(1), 616\u2013659 (2019). https:\/\/doi.org\/10.1137\/18M1164846","journal-title":"SIAM J. Optim."},{"key":"2180_CR60","doi-asserted-by":"publisher","unstructured":"Yu, Y., Huang, L.: Fast stochastic variance reduced ADMM for stochastic composition optimization. In: Sierra, C. (ed.) Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence, IJCAI 2017, Melbourne, Australia, August 19-25, 2017, pp. 3364\u20133370. ijcai.org (2017). https:\/\/doi.org\/10.24963\/ijcai.2017\/470","DOI":"10.24963\/ijcai.2017\/470"},{"issue":"2","key":"2180_CR61","doi-asserted-by":"publisher","first-page":"1131","DOI":"10.1137\/19M1285457","volume":"31","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Xiao, L.: Multilevel composite stochastic optimization via nested variance reduction. SIAM J. Optim. 31(2), 1131\u20131157 (2021). https:\/\/doi.org\/10.1137\/19M1285457","journal-title":"SIAM J. Optim."},{"issue":"1","key":"2180_CR62","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1007\/s10107-021-01709-z","volume":"195","author":"J Zhang","year":"2022","unstructured":"Zhang, J., Xiao, L.: Stochastic variance-reduced prox-linear algorithms for nonconvex composite optimization. Math. Program. 195(1), 649\u2013691 (2022). https:\/\/doi.org\/10.1007\/s10107-021-01709-z","journal-title":"Math. Program."},{"key":"2180_CR63","unstructured":"Zhang, S., He, N.: On the convergence rate of stochastic mirror descent for nonsmooth nonconvex optimization. arXiv:1806.04781 (2018)"}],"container-title":["Journal of Optimization Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-023-02180-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10957-023-02180-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-023-02180-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T17:09:54Z","timestamp":1688749794000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10957-023-02180-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,10]]},"references-count":63,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["2180"],"URL":"https:\/\/doi.org\/10.1007\/s10957-023-02180-w","relation":{},"ISSN":["0022-3239","1573-2878"],"issn-type":[{"type":"print","value":"0022-3239"},{"type":"electronic","value":"1573-2878"}],"subject":[],"published":{"date-parts":[[2023,3,10]]},"assertion":[{"value":"14 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}