{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:51:27Z","timestamp":1766137887078,"version":"3.37.3"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T00:00:00Z","timestamp":1641772800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T00:00:00Z","timestamp":1641772800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s10489-021-02953-8","type":"journal-article","created":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T00:03:16Z","timestamp":1641772996000},"page":"9885-9898","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Offline reinforcement learning with anderson acceleration for robotic tasks"],"prefix":"10.1007","volume":"52","author":[{"given":"Guoyu","family":"Zuo","sequence":"first","affiliation":[]},{"given":"Shuai","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5715-7824","authenticated-orcid":false,"given":"Jiangeng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Daoxiong","family":"Gong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,10]]},"reference":[{"key":"2953_CR1","doi-asserted-by":"crossref","unstructured":"Zhu Z, Zhao H (2021) A Survey of Deep RL and IL for Autonomous Driving Policy Learning. arXiv:2101.01993","DOI":"10.1109\/TITS.2021.3134702"},{"key":"2953_CR2","doi-asserted-by":"crossref","unstructured":"Kuderer M, Gulati S, Burgard W (2015) Learning driving styles for autonomous vehicles from demonstration. In: IEEE International Conference on Robotics and Automation (ICRA). IEEE, pp 2641\u20132646","DOI":"10.1109\/ICRA.2015.7139555"},{"key":"2953_CR3","unstructured":"Yu P, Lee J S, Kulyatin I et al (2019) Model-based deep reinforcement learning for dynamic portfolio optimization. arXiv:1901.08740"},{"issue":"9","key":"2953_CR4","doi-asserted-by":"publisher","first-page":"1384","DOI":"10.3390\/electronics9091384","volume":"9","author":"Y Yuan","year":"2020","unstructured":"Yuan Y, Wen W, Yang J (2020) Using data augmentation based reinforcement learning for daily stock trading. Electronics 9(9):1384","journal-title":"Electronics"},{"key":"2953_CR5","unstructured":"Paulus R, Xiong C, Socher R (2018) A Deep Reinforced Model for Abstractive Summarization. In: International Conference on Learning Representations (ICLR)"},{"key":"2953_CR6","doi-asserted-by":"crossref","unstructured":"Grissom II A, He H, Boyd-Graber J et al (2014) Don\u2019t until the final verb wait: Reinforcement learning for simultaneous machine translation. In: Proceedings of the 2014 Conference on empirical methods in natural language processing (EMNLP), pp 1342\u2013 1352","DOI":"10.3115\/v1\/D14-1140"},{"key":"2953_CR7","unstructured":"Kalashnikov D, Irpan A, Pastor P et al (2018) Scalable deep reinforcement learning for vision-based robotic manipulation. In: Conference on Robot Learning (PMLR), pp 651\u2013673"},{"key":"2953_CR8","doi-asserted-by":"crossref","unstructured":"Radosavovic I, Wang X, Pinto L et al (2020) State-only imitation learning for dexterous manipulation. arXiv:2004.04650","DOI":"10.1109\/IROS51168.2021.9636557"},{"key":"2953_CR9","unstructured":"Wu R, Li M, Yao Z et al (2021) Reinforcement Learning Enabled Automatic Impedance Control of a Robotic Knee Prosthesis to Mimic the Intact Knee Motion in a Co-Adapting Environment. CoRR arXiv:2101.03487"},{"key":"2953_CR10","unstructured":"Sutton R S, Barto A G (2018) Reinforcement learning: An introduction. MIT press"},{"key":"2953_CR11","unstructured":"Heess N, TB D, Sriram S et al (2017) Emergence of locomotion behaviours in rich environments. arXiv:1707.02286"},{"key":"2953_CR12","unstructured":"Yuan Y, Kitani K (2020) Residual Force Control for Agile Human Behavior Imitation and Extended Motion Synthesis. In: 33th Advances in Neural Information Processing Systems (NIPS)"},{"key":"2953_CR13","doi-asserted-by":"crossref","unstructured":"Dulac-Arnold G, Levine N, Mankowitz DJ et al (2021) Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Machine Learning","DOI":"10.1007\/s10994-021-05961-4"},{"key":"2953_CR14","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International Conference on Machine Learning (PMLR), pp 2052\u20132062"},{"key":"2953_CR15","unstructured":"Kumar A, Zhou A, Tucker G et al (2020) Conservative q-learning for offline reinforcement learning. In: 33th Advances in Neural Information Processing Systems (NIPS)"},{"key":"2953_CR16","unstructured":"Kumar A, Fu J, Soh M, Tucker G, Levine S (2019) Stabilizing off-policy q-learning via bootstrapping error reduction. In: Advances in Neural Information Processing Systems (NIPS), pp 11761\u201311771"},{"key":"2953_CR17","unstructured":"Wang Z, Novikov A, Zolna K et al (2020) Critic Regularized Regression. In: 33th Advances in Neural Information Processing Systems (NIPS)"},{"key":"2953_CR18","unstructured":"Shi W, Song S, Wu H, et al (2019) Regularized anderson acceleration for off-policy deep reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS), pp 10231\u201310241"},{"issue":"2021","key":"2953_CR19","first-page":"97","volume":"15","author":"S Yang","year":"2021","unstructured":"Yang S et al (2021) Efficient spike-driven learning with dendritic event-based processing. Front Neurosci 15(2021):97","journal-title":"Front Neurosci"},{"issue":"1","key":"2953_CR20","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1109\/TNNLS.2019.2899936","volume":"31","author":"S Yang","year":"2019","unstructured":"Yang S, Deng B, Wang J et al (2019) Scalable digital neuromorphic architecture for large-scale biophysically meaningful neural network with multi-compartment neurons[J]. IEEE Trans Neural Netw Learn Syst 31(1):148\u2013162","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2953_CR21","unstructured":"Geist M, Scherrer B (2018) Anderson acceleration for reinforcement learning. In: 2018-4th European workshop on Reinforcement Learning (EWRL)"},{"key":"2953_CR22","doi-asserted-by":"crossref","unstructured":"Gordon G J (1995) Stable function approximation in dynamic programming. In: Machine Learning Proceedings, Morgan Kaufmann, pp 261\u2013268","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"issue":"2","key":"2953_CR23","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1023\/A:1017928328829","volume":"49","author":"D Ormoneit","year":"2002","unstructured":"Ormoneit D, Sen \u015a (2002) Kernel-based reinforcement learning. Mach Learn 49(2):161\u2013178","journal-title":"Mach Learn"},{"key":"2953_CR24","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst D, Geurts P, Wehenkel L (2005) Tree-based batch mode reinforcement learning. J Mach Learn Res 6:503\u2013556","journal-title":"J Mach Learn Res"},{"key":"2953_CR25","unstructured":"Levine S, Kumar A, Tucker G et al (2020) Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv:2005.01643"},{"key":"2953_CR26","unstructured":"Nair A, Dalal M, Gupta A et al (2020) Accelerating online reinforcement learning with offline datasets. arXiv:2006.09359"},{"key":"2953_CR27","unstructured":"Laroche R, Trichelair P, Des Combes R T (2019) Safe policy improvement with baseline bootstrapping. In: International Conference on Machine Learning (PMLR), pp: 3652\u2013 3661"},{"key":"2953_CR28","doi-asserted-by":"crossref","unstructured":"Nadjahi K, Laroche R, des Combes R T (2019) Safe policy improvement with soft baseline bootstrapping. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases (ECML-PKDD). Springer, pp 53\u201368","DOI":"10.1007\/978-3-030-46133-1_4"},{"key":"2953_CR29","unstructured":"Agarwal R, Schuurmans D, Norouzi M (2020) An optimistic perspective on offline reinforcement learning. In: International Conference on Machine Learning (PMLR), pp 104\u2013114"},{"key":"2953_CR30","unstructured":"Wu Y, Tucker G, Nachum O (2019) Behavior regularized offline reinforcement learning. arXiv:1911.11361"},{"key":"2953_CR31","unstructured":"Jaques N, Ghandeharioun A, Shen J H et al (2019) Way off-policy batch deep reinforcement learning of implicit human preferences in dialog. arXiv:1907.00456"},{"key":"2953_CR32","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning (PMLR), pp 1587\u2013 1596"},{"key":"2953_CR33","unstructured":"Lillicrap T P, Hunt J J, Pritzel A et al (2016) Continuous control with deep reinforcement learning (ICLR) (Poster)"},{"key":"2953_CR34","doi-asserted-by":"crossref","unstructured":"Zuo G, Zhao Q, Chen K et al (2020) Off-policy adversarial imitation learning for robotic tasks with low-quality demonstrations. Appl Soft Comput 97:106795","DOI":"10.1016\/j.asoc.2020.106795"},{"key":"2953_CR35","first-page":"1","volume":"1050","author":"DP Kingma","year":"2013","unstructured":"Kingma D P, Welling M (2013) Auto-encoding variational bayes. Stat 1050:1","journal-title":"Stat"},{"key":"2953_CR36","unstructured":"Brockman G, Cheung V, Pettersson L et al (2016) OpenAI Gym. arXiv:1606.01540"},{"key":"2953_CR37","doi-asserted-by":"crossref","unstructured":"Hester T, Vecerik M, Pietquin O et al (2018) Deep Q-learning From Demonstrations (AAAI)","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"2953_CR38","doi-asserted-by":"crossref","unstructured":"Nair A, McGrew B, Andrychowicz M et al (2018) Overcoming exploration in reinforcement learning with demonstrations. In: 2018 IEEE International Conference on Robotics and Automation (ICRA). IEEE, pp 6292\u20136299","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"2953_CR39","doi-asserted-by":"crossref","unstructured":"Farag W, Saleh Z (2018) Behavior cloning for autonomous driving using convolutional neural networks. In: 2018 International Conference on Innovation and Intelligence for Informatics, Computing, and Technologies (3ICT), pp 17","DOI":"10.1109\/3ICT.2018.8855753"},{"key":"2953_CR40","unstructured":"Anschel O, Baram N, Shimkin N (2017) Averaged-dqn: Variance reduction and stabilization for deep reinforcement learning. In: International Conference on Machine Learning (PMLR), pp 176\u2013185"},{"key":"2953_CR41","doi-asserted-by":"crossref","unstructured":"Yang S, Wang J, Deng B et al (2021) Neuromorphic context-dependent learning framework with fault-tolerant spike routing[J]. In: IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2021.3084250"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02953-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-021-02953-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02953-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,22]],"date-time":"2023-01-22T10:21:42Z","timestamp":1674382902000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-021-02953-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,10]]},"references-count":41,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["2953"],"URL":"https:\/\/doi.org\/10.1007\/s10489-021-02953-8","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2022,1,10]]},"assertion":[{"value":"22 October 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 January 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors of this paper declare no conflict of interest in this paper and agree to submit this manuscript to the journal of Applied Intelligence.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Competing Interests"}}]}}