{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T14:07:09Z","timestamp":1772287629705,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2016,5,7]],"date-time":"2016-05-07T00:00:00Z","timestamp":1462579200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2017,1]]},"DOI":"10.1007\/s10107-016-1017-3","type":"journal-article","created":{"date-parts":[[2016,5,7]],"date-time":"2016-05-07T15:23:36Z","timestamp":1462634616000},"page":"419-449","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":107,"title":["Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions"],"prefix":"10.1007","volume":"161","author":[{"given":"Mengdi","family":"Wang","sequence":"first","affiliation":[]},{"given":"Ethan X.","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Han","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,5,7]]},"reference":[{"issue":"5","key":"1017_CR1","doi-asserted-by":"crossref","first-page":"3235","DOI":"10.1109\/TIT.2011.2182178","volume":"58","author":"A Agarwal","year":"2012","unstructured":"Agarwal, A., Bartlett, P., Ravikumar, P., Wainwright, M.: Information-theoretic lower bounds on the oracle complexity of stochastic convex optimization. IEEE Trans. Inf. Theory 58(5), 3235\u20133249 (2012)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"5B","key":"1017_CR2","doi-asserted-by":"crossref","first-page":"2877","DOI":"10.1214\/08-AOS664","volume":"37","author":"AA Amini","year":"2009","unstructured":"Amini, A.A., Wainwright, M.J.: High-dimensional analysis of semidefinite relaxations for sparse principal components. Ann. Stat. 37(5B), 2877\u20132921 (2009)","journal-title":"Ann. Stat."},{"key":"1017_CR3","volume-title":"Adaptive Algorithms and Stochastic Approximations","author":"A Benveniste","year":"2012","unstructured":"Benveniste, A., M\u00e9tivier, M., Priouret, P.: Adaptive Algorithms and Stochastic Approximations. Springer, Berlin (2012)"},{"key":"1017_CR4","volume-title":"Dynamic Programming and Optimal Control, Volume I\u2013II","author":"DP Bertsekas","year":"2007","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, Volume I\u2013II, 4th edn. Athena Scientific, Belmont (2007)","edition":"4"},{"key":"1017_CR5","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/s10107-011-0472-0","volume":"129","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas, D.P.: Incremental proximal methods for large scale convex optimization. Math. Program. Ser. B 129, 163\u2013195 (2011)","journal-title":"Math. Program. Ser. B"},{"key":"1017_CR6","volume-title":"Parallel and Distributed Computation: Numerical Methods","author":"DP Bertsekas","year":"1989","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Parallel and Distributed Computation: Numerical Methods. Athena Scientific, Belmont (1989)"},{"key":"1017_CR7","doi-asserted-by":"crossref","first-page":"519","DOI":"10.1017\/S0269964800005362","volume":"12","author":"S Bhatnagar","year":"1998","unstructured":"Bhatnagar, S., Borkar, V.S.: A two timescale stochastic approximation scheme for simulation-based parametric optimization. Prob. Eng. Inf. Sci. 12, 519\u2013531 (1998)","journal-title":"Prob. Eng. Inf. Sci."},{"key":"1017_CR8","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1016\/S0167-6911(97)90015-3","volume":"29","author":"VS Borkar","year":"1997","unstructured":"Borkar, V.S.: Stochastic approximation with two time scales. Syst. Control Lett. 29, 291\u2013294 (1997)","journal-title":"Syst. Control Lett."},{"key":"1017_CR9","doi-asserted-by":"crossref","DOI":"10.1007\/978-93-86279-38-5","volume-title":"Stochastic Approximation: A Dynamical Systems Viewpoint","author":"VS Borkar","year":"2008","unstructured":"Borkar, V.S.: Stochastic Approximation: A Dynamical Systems Viewpoint. Cambridge University Press, Cambridge (2008)"},{"key":"1017_CR10","doi-asserted-by":"crossref","unstructured":"Dentcheva, D., Penev, S., Ruszczynski, A.: Statistical estimation of composite risk functionals and risk optimization problems. arXiv preprint. arXiv:1504.02658 (2015)","DOI":"10.1007\/s10463-016-0559-8"},{"key":"1017_CR11","volume-title":"Numerical Techniques for Stochastic Optimization","author":"Y Ermoliev","year":"2011","unstructured":"Ermoliev, Y., Wets, R.: Numerical Techniques for Stochastic Optimization. Springer, New York (2011)"},{"key":"1017_CR12","volume-title":"Methods of Stochastic Programming. Monographs in Optimization and OR","author":"YM Ermoliev","year":"1976","unstructured":"Ermoliev, Y.M.: Methods of Stochastic Programming. Monographs in Optimization and OR. Nauka, Moscow (1976)"},{"issue":"4","key":"1017_CR13","doi-asserted-by":"crossref","first-page":"1469","DOI":"10.1137\/110848864","volume":"22","author":"S Ghadimi","year":"2012","unstructured":"Ghadimi, S., Lan, G.: Optimal stochastic approximation algorithms for strongly convex stochastic composite optimization I: a generic algorithmic framework. SIAM J. Optim. 22(4), 1469\u20131492 (2012)","journal-title":"SIAM J. Optim."},{"issue":"4","key":"1017_CR14","doi-asserted-by":"crossref","first-page":"2061","DOI":"10.1137\/110848876","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., Lan, G.: Optimal stochastic approximation algorithms for strongly convex stochastic composite optimization, II: shrinking procedures and optimal algorithms. SIAM J. Optim. 23(4), 2061\u20132089 (2013)","journal-title":"SIAM J. Optim."},{"issue":"4","key":"1017_CR15","doi-asserted-by":"crossref","first-page":"2282","DOI":"10.1214\/09-AOS781","volume":"38","author":"J Huang","year":"2010","unstructured":"Huang, J., Horowitz, J.L., Wei, F.: Variable selection in nonparametric additive models. Ann. Stat. 38(4), 2282 (2010)","journal-title":"Ann. Stat."},{"issue":"3","key":"1017_CR16","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1214\/aoms\/1177729392","volume":"23","author":"J Kiefer","year":"1952","unstructured":"Kiefer, J., Wolfowitz, J.: Stochastic estimation of the maximum of a regression function. Ann. Math. Stat. 23(3), 462\u2013466 (1952)","journal-title":"Ann. Math. Stat."},{"key":"1017_CR17","doi-asserted-by":"crossref","first-page":"796","DOI":"10.1214\/105051604000000116","volume":"14","author":"R Konda","year":"2004","unstructured":"Konda, R., Tsitsikilis, J.N.: Convergence rate of linear two-time-scale stochastic approximation. Ann. Appl. Prob. 14, 796\u2013819 (2004)","journal-title":"Ann. Appl. Prob."},{"key":"1017_CR18","volume-title":"Stochastic Recurrent Procedures","author":"AP Korostelev","year":"1984","unstructured":"Korostelev, A.P.: Stochastic Recurrent Procedures. Nauka, Moscow (1984)"},{"key":"1017_CR19","volume-title":"Stochastic Approximation and Recursive Algorithms and Applications","author":"HJ Kushner","year":"2003","unstructured":"Kushner, H.J., Yin, G.: Stochastic Approximation and Recursive Algorithms and Applications. Springer, New York (2003)"},{"key":"1017_CR20","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1007\/s10107-011-0468-9","volume":"129","author":"A Nedi\u0107","year":"2011","unstructured":"Nedi\u0107, A.: Random algorithms for convex minimization problems. Math. Prog. Ser. B 129, 225\u2013253 (2011)","journal-title":"Math. Prog. Ser. B"},{"key":"1017_CR21","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1137\/S1052623499362111","volume":"12","author":"A Nedi\u0107","year":"2001","unstructured":"Nedi\u0107, A., Bertsekas, D.P.: Incremental subgradient methods for nondifferentiable optimization. SIAM J. Optim. 12, 109\u2013138 (2001)","journal-title":"SIAM J. Optim."},{"key":"1017_CR22","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1016\/S1570-579X(01)80023-9","volume":"8","author":"A Nedi\u0107","year":"2001","unstructured":"Nedi\u0107, A., Bertsekas, D.P., Borkar, V.S.: Distributed asynchronous incremental subgradient methods. Stud. Comput. Math. 8, 381\u2013407 (2001)","journal-title":"Stud. Comput. Math."},{"key":"1017_CR23","doi-asserted-by":"crossref","unstructured":"Nemirovski, A., Rubinstein, R.Y.: An efficient stochastic approximation algorithm for stochastic saddle point problems. In: Dror, M., L\u2019Ecuyer, P., Szidarovszky, F. (eds.) Modeling Uncertainty, pp. 156\u2013184. Springer (2005)","DOI":"10.1007\/0-306-48102-2_8"},{"key":"1017_CR24","volume-title":"Problem Complexity and Method Efficiency in Optimization","author":"A Nemirovsky","year":"1983","unstructured":"Nemirovsky, A., Yudin, D.: Problem Complexity and Method Efficiency in Optimization. Wiley, New York (1983)"},{"issue":"2","key":"1017_CR25","first-page":"372","volume":"27","author":"Y Nesterov","year":"1983","unstructured":"Nesterov, Y.: A method of solving a convex programming problem with convergence rate $${\\cal O}(1\/k^2)$$ O ( 1 \/ k 2 ) . Sov. Math. Dokl. 27(2), 372\u2013376 (1983)","journal-title":"Sov. Math. Dokl."},{"issue":"4","key":"1017_CR26","doi-asserted-by":"crossref","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"BT Polyak","year":"1992","unstructured":"Polyak, B.T., Juditsky, A.: Acceleration of stochastic approximation by averaging. SIAM J. Control Optim. 30(4), 838\u2013855 (1992)","journal-title":"SIAM J. Control Optim."},{"key":"1017_CR27","unstructured":"Rakhlin, A., Shamir, O., Sridharan, K.: Making gradient descent optimal for strongly convex stochastic optimization. In: Proceedings of the 29th International Conference on Machine Learning, pp. 449\u2013456 (2012)"},{"issue":"5","key":"1017_CR28","doi-asserted-by":"crossref","first-page":"1009","DOI":"10.1111\/j.1467-9868.2009.00718.x","volume":"71","author":"P Ravikumar","year":"2009","unstructured":"Ravikumar, P., Lafferty, J., Liu, H., Wasserman, L.: Sparse additive models. J. R. Stat. Soc. Ser. B 71(5), 1009\u20131030 (2009)","journal-title":"J. R. Stat. Soc. Ser. B"},{"key":"1017_CR29","doi-asserted-by":"crossref","unstructured":"Robbins, H., Siegmund, D.: A convergence theorem for nonnegative almost supermartingales and some applications. J.S. Rustagi, Optimizing Methods in Statistics, pp. 233\u2013257. Academic Press, NY (1985)","DOI":"10.1007\/978-1-4612-5110-1_10"},{"key":"1017_CR30","unstructured":"Shamir, O., Zhang, T.: Stochastic gradient descent for non-smooth optimization: convergence results and optimal averaging schemes. In: Proceedings of the 30th International Conference on Machine Learning, pp. 71\u201379 (2013)"},{"key":"1017_CR31","doi-asserted-by":"crossref","unstructured":"Shapiro, A., Dentcheva, D., Ruszczynski, A.: Lectures on stochastic programming: modeling and theory. MOS-SIAM series on optimization. SIAM, Philadelphia (2009)","DOI":"10.1137\/1.9780898718751"},{"key":"1017_CR32","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R.: Regression shrinkage and selection via the Lasso. J. R. Stat. Soc. Ser. B 58, 267\u2013288 (1996)","journal-title":"J. R. Stat. Soc. Ser. B"},{"issue":"2","key":"1017_CR33","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1007\/s10107-014-0769-x","volume":"150","author":"M Wang","year":"2015","unstructured":"Wang, M., Bertsekas, D.P.: Incremental constraint projection methods for variational inequalities. Math. Program. Ser. A 150(2), 321\u2013363 (2015)","journal-title":"Math. Program. Ser. A"},{"key":"1017_CR34","unstructured":"Wang, M., Bertsekas, D.P.: Incremental constraint projection-proximal methods for nonsmooth convex optimization. SIAM J. Optim. (2014) (in press)"},{"issue":"1","key":"1017_CR35","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1093\/biomet\/asm018","volume":"94","author":"M Yuan","year":"2007","unstructured":"Yuan, M., Lin, Y.: Model selection and estimation in the Gaussian graphical model. Biometrika 94(1), 19\u201335 (2007)","journal-title":"Biometrika"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-016-1017-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10107-016-1017-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-016-1017-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-016-1017-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T08:08:35Z","timestamp":1718525315000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10107-016-1017-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,5,7]]},"references-count":35,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2017,1]]}},"alternative-id":["1017"],"URL":"https:\/\/doi.org\/10.1007\/s10107-016-1017-3","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,5,7]]}}}