{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:17:42Z","timestamp":1777015062959,"version":"3.51.4"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23A20382"],"award-info":[{"award-number":["U23A20382"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Collaborative Innovation Center of Novel Software Technology and Industrialization"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/tpami.2025.3552197","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T17:36:21Z","timestamp":1742319381000},"page":"5613-5624","source":"Crossref","is-referenced-by-count":2,"title":["Revisiting Stochastic Multi-Level Compositional Optimization"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4595-1060","authenticated-orcid":false,"given":"Wei","family":"Jiang","sequence":"first","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1201-8476","authenticated-orcid":false,"given":"Sifan","family":"Yang","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, School of Artiicial Intelligence, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7410-6854","authenticated-orcid":false,"given":"Yibo","family":"Wang","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, School of Artiicial Intelligence, Nanjing University, Nanjing, China"}]},{"given":"Tianbao","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Texas A&#x0026;M University, College Station, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5138-3182","authenticated-orcid":false,"given":"Lijun","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, School of Artiicial Intelligence, Nanjing University, Nanjing, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"809","article-title":"Policy evaluation with temporal differences: A survey and comparison","volume":"15","author":"Dann","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref2","article-title":"On tilted losses in machine learning: Theory and applications","author":"Li","year":"2021"},{"key":"ref3","article-title":"Multi-step model-agnostic meta-learning: Convergence and improved algorithms","author":"Ji","year":"2020"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2015.10.013"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611976595","volume-title":"Lectures on Stochastic Programming: Modeling and Theory","author":"Shapiro","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1093\/rfs\/hhw080"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10463-016-0559-8"},{"key":"ref8","first-page":"15210","article-title":"Momentum-based variance reduction in non-convex SGD","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Cutkosky"},{"key":"ref9","article-title":"An online method for a class of distributionally robust optimization with non-convex objectives","author":"Qi","year":"2021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/18M1164846"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/21m1406222"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3092377"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1137\/19M1285457"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-024-02090-3"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2011.2182178"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-022-01822-7"},{"key":"ref17","first-page":"2672","article-title":"A stochastic gradient method with an exponential convergence rate for finite training sets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Roux"},{"key":"ref18","first-page":"10195","article-title":"Optimal algorithms for stochastic multi-level compositional optimization","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Jiang"},{"key":"ref19","first-page":"6286","article-title":"Page: A simple and optimal probabilistic gradient estimator for nonconvex optimization","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-016-1017-3"},{"key":"ref21","first-page":"105:1","article-title":"Accelerating stochastic composition optimization","volume":"18","author":"Wang","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1137\/18M1230542"},{"key":"ref23","article-title":"Sarah: A novel method for machine learning problems using stochastic recursive gradient","author":"Nguyen","year":"2017"},{"key":"ref24","article-title":"Spider: Near-optimal non-convex optimization via stochastic path integrated differential estimator","author":"Fang","year":"2018"},{"key":"ref25","article-title":"Spiderboost: A class of faster variance-reduced algorithms for nonconvex optimization","author":"Wang","year":"2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11795"},{"key":"ref27","first-page":"14905","article-title":"Efficient smooth non-convex stochastic compositional optimization via stochastic recursive gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yuan"},{"key":"ref28","first-page":"9075","article-title":"A stochastic composite gradient method with incremental variance reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3071594"},{"key":"ref30","article-title":"Stochastic recursive momentum method for non-convex compositional optimization","author":"Yuan","year":"2020"},{"key":"ref31","first-page":"4402","article-title":"Decentralized multi-level compositional optimization algorithms with level-independent convergence rate","volume-title":"Proc. 27th Int. Conf. Artif. Intell. Statist.","author":"Gao"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2024.3392351"},{"key":"ref33","first-page":"1159","article-title":"Finite-sum composition optimization via variance reduced gradient descent","volume-title":"Proc. 20th Int. Conf. Artif. Intell. Statist.","author":"Lian"},{"key":"ref34","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Johnson"},{"key":"ref35","first-page":"32499","article-title":"Multi-block-single-probe variance reduced estimator for coupled compositional optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jiang"},{"key":"ref36","first-page":"15105","article-title":"Learning unnormalized statistical models via compositional optimization","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Jiang"},{"key":"ref37","first-page":"745","article-title":"Stability and generalization of learning algorithms that converge to global optima","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Charles"},{"key":"ref38","first-page":"14905","article-title":"Solving a class of non-convex min-max games using iterative first order methods","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nouiehed"},{"key":"ref39","first-page":"1475","article-title":"Linear convergence of adaptive stochastic gradient descent","volume-title":"Proc. 23rd Int. Conf. Artif. Intell. Statist.","author":"Xie"},{"key":"ref40","first-page":"1276","article-title":"Gradient descent algorithms for Bures-Wasserstein barycenters","volume-title":"Proc. 33rd Conf. Learn. Theory","author":"Chewi"},{"key":"ref41","first-page":"980","article-title":"Linear convergence with condition number independent access of full gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref42","first-page":"257","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume-title":"Proc. 23rd Annu. Conf. Learn. Theory","author":"Duchi"},{"key":"ref43","article-title":"Adam: A method for stochastic optimization","author":"Kingma"},{"key":"ref44","article-title":"On the convergence of Adam and beyond","author":"Reddi"},{"key":"ref45","article-title":"Adaptive gradient methods with dynamic bound of learning rate","author":"Luo"},{"key":"ref46","article-title":"On stochastic moving-average estimators for non-convex optimization","author":"Guo","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/springerreference_178963"},{"key":"ref48","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Finn"},{"key":"ref49","first-page":"2568","article-title":"One shot learning of simple visual concepts","volume-title":"Proc. Annu. Meeting Cogn. Sci. Soc.","author":"Lake"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11026037\/10930713.pdf?arnumber=10930713","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,6]],"date-time":"2025-06-06T17:42:17Z","timestamp":1749231737000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10930713\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":49,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3552197","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}