{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T07:01:35Z","timestamp":1760598095964},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2021,11,19]],"date-time":"2021-11-19T00:00:00Z","timestamp":1637280000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,11,19]],"date-time":"2021-11-19T00:00:00Z","timestamp":1637280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s11432-019-2889-x","type":"journal-article","created":{"date-parts":[[2021,11,28]],"date-time":"2021-11-28T04:20:52Z","timestamp":1638073252000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Accelerated value iteration via Anderson mixing"],"prefix":"10.1007","volume":"64","author":[{"given":"Yujun","family":"Li","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,19]]},"reference":[{"key":"2889_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"R S Sutton","year":"1998","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: An Introduction. Cambridge: MIT Press, 1998"},{"key":"2889_CR2","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"C J Watkins","year":"1992","unstructured":"Watkins C J, Dayan P. Q-learning. Mach Learn, 1992, 8: 279\u2013292","journal-title":"Mach Learn"},{"key":"2889_CR3","first-page":"20","volume-title":"On-line Q-learning using connectionist systems","author":"G A Rummery","year":"1994","unstructured":"Rummery G A, Niranjan M. On-line Q-learning using connectionist systems. Cambridge: University of Cambridge, Department of Engineering, 1994, 37: 20"},{"key":"2889_CR4","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M Wiering","year":"1997","unstructured":"Wiering M, Schmidhuber J. HQ-learning. Adaptive Behav, 1997, 6: 219\u2013246","journal-title":"Adaptive Behav"},{"key":"2889_CR5","doi-asserted-by":"publisher","first-page":"2279","DOI":"10.1007\/s11432-011-4332-6","volume":"54","author":"C L Chen","year":"2011","unstructured":"Chen C L, Dong D Y, Li H-X, et al. Hybrid MDP based integrated hierarchical Q-learning. Sci China Inf Sci, 2011, 54: 2279\u20132294","journal-title":"Sci China Inf Sci"},{"key":"2889_CR6","unstructured":"Sutton R S, McAllester D, Singh S, et al. Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of Conference on Neural Information Processing Systems, 2000"},{"key":"2889_CR7","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L P Kaelbling","year":"1996","unstructured":"Kaelbling L P, Littman M L, Moore A W. Reinforcement learning: a survey. J Artif Intell Res, 1996, 4: 237\u2013285","journal-title":"J Artif Intell Res"},{"key":"2889_CR8","unstructured":"Bellemare M G, Dabney W, Munos R. A distributional perspective on reinforcement learning. In: Proceedings of International Conference on Machine Learning, 2017"},{"key":"2889_CR9","unstructured":"Schaul T, Quan J, Antonoglou I, et al. Prioritized experience replay. In: Proceedings of International Conference on Learning Representations, 2016"},{"key":"2889_CR10","unstructured":"Van H H, Guez A, Silver D. Deep reinforcement learning with double q-learning. In: Proceedings of AAAI Conference on Artificial Intelligence, 2016"},{"key":"2889_CR11","unstructured":"Wang Z Y, Schaul T, Hessel M, et al. Dueling network architectures for deep reinforcement learning. In: Proceedings of International Conference on Machine Learning, 2015"},{"key":"2889_CR12","unstructured":"Mnih V, Kavukcuoglu K, Silver D, et al. Playing Atari with deep reinforcement learning. 2013. ArXiv:1312.5602"},{"key":"2889_CR13","doi-asserted-by":"publisher","first-page":"052204","DOI":"10.1007\/s11432-018-9602-1","volume":"62","author":"X X Li","year":"2019","unstructured":"Li X X, Peng Z H, Liang L, et al. Policy iteration based Q-learning for linear nonzero-sum quadratic differential games. Sci China Inf Sci, 2019, 62: 052204","journal-title":"Sci China Inf Sci"},{"key":"2889_CR14","doi-asserted-by":"publisher","first-page":"119204","DOI":"10.1007\/s11432-018-9463-x","volume":"61","author":"X H Yan","year":"2018","unstructured":"Yan X H, Zhu J H, Kuang M C, et al. Missile aerodynamic design using reinforcement learning and transfer learning. Sci China Inf Sci, 2018, 61: 119204","journal-title":"Sci China Inf Sci"},{"key":"2889_CR15","unstructured":"Dieleman S, Aaron V D O, Karen S. The challenge of realistic music generation: modelling raw audio at scale. In: Proceedings of Conference on Neural Information Processing Systems, 2018"},{"key":"2889_CR16","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman R. A Markovian decision process. J Math Mech, 1957, 6: 679\u2013684","journal-title":"J Math Mech"},{"key":"2889_CR17","volume-title":"Dynamic Programming and Markov Processes","author":"R A Howard","year":"1964","unstructured":"Howard R A. Dynamic Programming and Markov Processes. Hoboken: John Wiley & Sons, 1964"},{"key":"2889_CR18","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1137\/130932284","volume":"37","author":"A Alla","year":"2015","unstructured":"Alla A, Falcone M, Kalise D. An efficient policy iteration algorithm for dynamic programming equations. SIAM J Sci Comput, 2015, 37: 181\u2013200","journal-title":"SIAM J Sci Comput"},{"key":"2889_CR19","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M L Puterman","year":"2014","unstructured":"Puterman M L. Markov Decision Processes: Discrete Stochastic Dynamic Programming. Hoboken: John Wiley & Sons, 2014"},{"key":"2889_CR20","doi-asserted-by":"crossref","unstructured":"Laurini M, Micelli P, Consolini L, et al. A Jacobi-like acceleration for dynamic programming. In: Proceedings of Conference on Decision and Control, 2016. 7371\u20137376","DOI":"10.1109\/CDC.2016.7799408"},{"key":"2889_CR21","doi-asserted-by":"publisher","first-page":"8435","DOI":"10.1016\/j.ifacol.2017.08.735","volume":"50","author":"M Laurini","year":"2017","unstructured":"Laurini M, Consolini L, Locatelli M. A consensus approach to dynamic programming. IFAC-PapersOnLine, 2017, 50: 8435\u20138440","journal-title":"IFAC-PapersOnLine"},{"key":"2889_CR22","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1561\/2200000050","volume":"8","author":"S Bubeck","year":"2015","unstructured":"Bubeck S. Convex optimization: algorithms and complexity. FNT Mach Learn, 2015, 8: 231\u2013357","journal-title":"FNT Mach Learn"},{"key":"2889_CR23","unstructured":"Scieur D, D\u2019Aspremont A, Bach F. Regularized nonlinear acceleration. In: Proceedings of Conference on Neural Information Processing Systems, 2016"},{"key":"2889_CR24","unstructured":"Scieur D, Bach F, D\u2019Aspremont A. Nonlinear acceleration of stochastic algorithms. In: Proceedings of Conference on Neural Information Processing Systems, 2017"},{"key":"2889_CR25","unstructured":"Xie G Z, Wang Y T, Zhou S C, et al. Interpolatron: interpolation or extrapolation schemes to accelerate optimization for deep neural networks. 2018. ArXiv: 1805.06753"},{"key":"2889_CR26","unstructured":"Anschel O, Baram N, Shimkin N. Averaged-DQN: variance reduction and stabilization for deep reinforcement learning. In: Proceedings of International Conference on Machine Learning, 2017"},{"key":"2889_CR27","unstructured":"Johnson R, Zhang T. Accelerating stochastic gradient descent using predictive variance reduction. In: Proceedings of Conference on Neural Information Processing Systems, 2013"},{"key":"2889_CR28","doi-asserted-by":"publisher","first-page":"012101","DOI":"10.1007\/s11432-018-9656-y","volume":"62","author":"C Y Chen","year":"2019","unstructured":"Chen C Y, Wang W L, Zhang Y Z, et al. A convergence analysis for a class of practical variance-reduction stochastic gradient MCMC. Sci China Inf Sci, 2019, 62: 012101","journal-title":"Sci China Inf Sci"},{"key":"2889_CR29","unstructured":"Zhang J, O\u2019Donoghue B, Boyd S. Globally convergent type-I anderson acceleration for non-smooth fixed-point iterations. 2018. ArXiv: 1808.03971"},{"key":"2889_CR30","doi-asserted-by":"publisher","first-page":"547","DOI":"10.1145\/321296.321305","volume":"12","author":"D G Anderson","year":"1965","unstructured":"Anderson D G. Iterative procedures for nonlinear integral equations. J ACM, 1965, 12: 547\u2013560","journal-title":"J ACM"},{"key":"2889_CR31","doi-asserted-by":"publisher","first-page":"1715","DOI":"10.1137\/10078356X","volume":"49","author":"H F Walker","year":"2011","unstructured":"Walker H F, Ni P. Anderson acceleration for fixed-point iterations. SIAM J Numer Anal, 2011, 49: 1715\u20131735","journal-title":"SIAM J Numer Anal"},{"key":"2889_CR32","doi-asserted-by":"publisher","first-page":"805","DOI":"10.1137\/130919398","volume":"53","author":"A Toth","year":"2015","unstructured":"Toth A, Kelley C T. Convergence analysis for anderson acceleration. SIAM J Numer Anal, 2015, 53: 805\u2013819","journal-title":"SIAM J Numer Anal"},{"key":"2889_CR33","unstructured":"Ortega J M, Rheinboldt W C. Iterative solution of nonlinear equations in several variables. 1970"},{"key":"2889_CR34","doi-asserted-by":"crossref","unstructured":"Puterman M L, Brumelle S L. The analytic theory of policy iteration. In: Proceedings of Conference on Dynamic Programming and Its Applications, 1978. 91\u2013113","DOI":"10.1016\/B978-0-12-568150-6.50010-6"},{"key":"2889_CR35","unstructured":"Nesterov Y. Introductory lectures on convex programming volume I: Basic course. 1998"},{"key":"2889_CR36","unstructured":"Brockman G, Cheung V, Pettersson L, et al. OpenAI Gym. 2016. ArXiv: 1606.01540"},{"key":"2889_CR37","unstructured":"Kingma D P, Ba L J. Adam: a method for stochastic optimization. 2014. ArXiv: 1412.6980"},{"key":"2889_CR38","unstructured":"Abadi M, Barham P, Chen J M, et al. Tensorflow: a system for large-scale machine learning. In: Proceedings of Conference on Operating Systems Design and Implementation, 2016. 265\u2013283"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-019-2889-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-019-2889-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-019-2889-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T12:15:04Z","timestamp":1674821704000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-019-2889-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,19]]},"references-count":38,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["2889"],"URL":"https:\/\/doi.org\/10.1007\/s11432-019-2889-x","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,11,19]]},"assertion":[{"value":"18 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"222105"}}