{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T06:05:10Z","timestamp":1771049110832,"version":"3.50.1"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["Discovery Grant RGPIN-2023- 05408"],"award-info":[{"award-number":["Discovery Grant RGPIN-2023- 05408"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s00521-025-11733-1","type":"journal-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T06:32:16Z","timestamp":1769581936000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Sample-efficient reinforcement learning with symmetry-guided demonstrations for robotic manipulation"],"prefix":"10.1007","volume":"38","author":[{"given":"Amir Mehdi","family":"Soufi Enayati","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zengjie","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kashish","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3550-225X","authenticated-orcid":false,"given":"Homayoun","family":"Najjaran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,28]]},"reference":[{"issue":"9","key":"11733_CR1","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold G, Levine N, Mankowitz DJ, Li J, Paduraru C, Gowal S, Hester T (2021) Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Mach Learn 110(9):2419\u20132468","journal-title":"Mach Learn"},{"key":"11733_CR2","unstructured":"Buckman J, Hafner D, Tucker G, Brevdo E, Lee H (2018) Sample-efficient reinforcement learning with stochastic ensemble value expansion. Advances in neural information processing systems 31"},{"key":"11733_CR3","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.neucom.2022.09.114","volume":"512","author":"AM Soufi Enayati","year":"2022","unstructured":"Soufi Enayati AM, Zhang Z, Najjaran H (2022) A methodical interpretation of adaptive robotics: study and reformulation. Neurocomputing 512:381\u2013397. https:\/\/doi.org\/10.1016\/j.neucom.2022.09.114","journal-title":"Neurocomputing"},{"issue":"21","key":"11733_CR4","doi-asserted-by":"publisher","first-page":"10337","DOI":"10.3390\/app112110337","volume":"11","author":"J Ren","year":"2021","unstructured":"Ren J, Zeng Y, Zhou S, Zhang Y (2021) An experimental study on state representation extraction for vision-based deep reinforcement learning. Appl Sci 11(21):10337","journal-title":"Appl Sci"},{"issue":"2","key":"11733_CR5","doi-asserted-by":"publisher","first-page":"2230","DOI":"10.1109\/JSEN.2020.3016299","volume":"21","author":"MM Ejaz","year":"2020","unstructured":"Ejaz MM, Tang TB, Lu C-K (2020) Vision-based autonomous navigation approach for a tracked robot using deep reinforcement learning. IEEE Sens J 21(2):2230\u20132240","journal-title":"IEEE Sens J"},{"key":"11733_CR6","unstructured":"Li C, Xia F, Martin-Martin R, Savarese S (2020) Hrl4in: hierarchical reinforcement learning for interactive navigation with mobile manipulators. In: Conference on robot learning PMLR, pp 603\u2013616"},{"issue":"11","key":"11733_CR7","doi-asserted-by":"publisher","first-page":"11565","DOI":"10.1109\/TIE.2020.3038072","volume":"68","author":"Z Hou","year":"2020","unstructured":"Hou Z, Fei J, Deng Y, Xu J (2020) Data-efficient hierarchical reinforcement learning for robotic assembly control applications. IEEE Trans Ind Electron 68(11):11565\u201311575","journal-title":"IEEE Trans Ind Electron"},{"key":"11733_CR8","doi-asserted-by":"crossref","unstructured":"Yang X, Ji Z, Wu J, Lai Y-K, Wei C, Liu G, Setchi R (2021) Hierarchical reinforcement learning with universal policies for multistep robotic manipulation. IEEE Trans Neural Netw Learn Syst","DOI":"10.1109\/TNNLS.2021.3059912"},{"issue":"1","key":"11733_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s42979-021-00934-9","volume":"3","author":"K Gupta","year":"2022","unstructured":"Gupta K, Mukherjee D, Najjaran H (2022) Extending the capabilities of reinforcement learning through curriculum: a review of methods and applications. SN Comput Sci 3(1):1\u201318","journal-title":"SN Comput Sci"},{"issue":"6","key":"11733_CR10","doi-asserted-by":"publisher","first-page":"1360","DOI":"10.1109\/TRO.2012.2210294","volume":"28","author":"F Stulp","year":"2012","unstructured":"Stulp F, Theodorou EA, Schaal S (2012) Reinforcement learning with sequences of motion primitives for robust manipulation. IEEE Trans Rob 28(6):1360\u20131370","journal-title":"IEEE Trans Rob"},{"key":"11733_CR11","unstructured":"Mahajan A, Tulabandhula T (2017) Symmetry learning for function approximation in reinforcement learning. arXiv preprint arXiv:1706.02999"},{"key":"11733_CR12","unstructured":"Gupta K (2021) Reinforcement learning in complex environments with locally trained NA\u00efve agents. PhD thesis, University of British Columbia"},{"key":"11733_CR13","doi-asserted-by":"crossref","unstructured":"Gupta K, Najjaran H (2022) Exploiting abstract symmetries in reinforcement learning for complex environments. In: 2022 IEEE international conference on robotics and automation (ICRA), IEEE","DOI":"10.1109\/ICRA46639.2022.9811652"},{"issue":"6","key":"11733_CR14","doi-asserted-by":"publisher","first-page":"1463","DOI":"10.1109\/TRO.2008.2006703","volume":"24","author":"M Hersch","year":"2008","unstructured":"Hersch M, Guenter F, Calinon S, Billard A (2008) Dynamical system modulation for robot learning via kinesthetic demonstrations. IEEE Trans Rob 24(6):1463\u20131467","journal-title":"IEEE Trans Rob"},{"issue":"5","key":"11733_CR15","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall BD, Chernova S, Veloso M, Browning B (2009) A survey of robot learning from demonstration. Robot Auton Syst 57(5):469\u2013483","journal-title":"Robot Auton Syst"},{"key":"11733_CR16","doi-asserted-by":"crossref","unstructured":"Zhang Z, Hong J, Enayati AMS, Najjaran H (2024) Using implicit behavior cloning and dynamic movement primitive to facilitate reinforcement learning for robot motion planning. IEEE Trans Robot","DOI":"10.1109\/TRO.2024.3468770"},{"key":"11733_CR17","doi-asserted-by":"crossref","unstructured":"Calderon-Cordova C, Sarango R, Castillo D, Lakshminarayanan V (2024) A deep reinforcement learning framework for control of robotic manipulators in simulated environments. IEEE Access","DOI":"10.3390\/engproc2023047012"},{"key":"11733_CR18","unstructured":"Zhang T, Hu Y, Cui H, Zhao H, Gao Y (2023) A universal semantic-geometric representation for robotic manipulation . arxiv:2306.10474"},{"key":"11733_CR19","unstructured":"Zhang T, Hu Y, You J, Gao Y (2024) Leveraging locality to boost sample efficiency in robotic manipulation. arxiv:2406.10615"},{"key":"11733_CR20","unstructured":"Wang D, Walters R, Zhu X, Platt R (2022) Equivariant $$q$$ learning in spatial action spaces. In: Faust A, Hsu D, Neumann G (eds) Proceedings of the 5th conference on robot learning. Proceedings of machine learning research, vol 164, pp 1713\u20131723. PMLR. https:\/\/proceedings.mlr.press\/v164\/wang22j.html"},{"key":"11733_CR21","unstructured":"Nguyen HH, Baisero A, Klee D, Wang D, Platt R, Amato C (2023) Equivariant reinforcement learning under partial observability. In: Tan J, Toussaint M, Darvish K (eds) Proceedings of The 7th conference on robot learning. Proceedings of machine learning research, vol 229, pp 3309\u20133320. PMLR. https:\/\/proceedings.mlr.press\/v229\/nguyen23a.html"},{"key":"11733_CR22","doi-asserted-by":"crossref","unstructured":"Nair A, McGrew B, Andrychowicz M, Zaremba W, Abbeel P (2018). Overcoming exploration in reinforcement learning with demonstrations. IEEE, pp 6292\u20136299","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"11733_CR23","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971"},{"key":"11733_CR24","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"issue":"6","key":"11733_CR25","doi-asserted-by":"publisher","first-page":"13205","DOI":"10.1111\/exsy.13205","volume":"40","author":"Q Wang","year":"2023","unstructured":"Wang Q, Sanchez FR, McCarthy R, Bulens DC, McGuinness K, O\u2019Connor N, W\u00fcthrich M, Widmaier F, Bauer S, Redmond SJ (2023) Dexterous robotic manipulation using deep reinforcement learning and knowledge transfer for complex sparse reward-based tasks. Expert Syst 40(6):13205","journal-title":"Expert Syst"},{"issue":"5","key":"11733_CR26","doi-asserted-by":"publisher","first-page":"2804","DOI":"10.1109\/TPAMI.2023.3339515","volume":"46","author":"Y Chen","year":"2024","unstructured":"Chen Y, Geng Y, Zhong F, Ji J, Jiang J, Lu Z, Dong H, Yang Y (2024) Bi-dexhands: Towards human-level bimanual dexterous manipulation. IEEE Trans Pattern Anal Mach Intell 46(5):2804\u20132818. https:\/\/doi.org\/10.1109\/TPAMI.2023.3339515","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11733_CR27","unstructured":"Fedus W, Ramachandran P, Agarwal R, Bengio Y, Larochelle H, Rowland M, Dabney W (2020) Revisiting fundamentals of experience replay. In: III HD, Singh A (eds) Proceedings of the 37th international conference on machine learning. Proceedings of machine learning research, vol 119, pp 3061\u20133071. PMLR. https:\/\/proceedings.mlr.press\/v119\/fedus20a.html"},{"key":"11733_CR28","doi-asserted-by":"crossref","unstructured":"Taylor ME, Suay HB, Chernova S (2011) Integrating reinforcement learning with human demonstrations of varying ability. In: The 10th international conference on autonomous agents and multiagent systems, vol 2, pp 617\u2013624","DOI":"10.65109\/QFFJ7991"},{"key":"11733_CR29","unstructured":"Gao Y, Xu H, Lin J, Yu F, Levine S, Darrell T (2018) Reinforcement learning from imperfect demonstrations. arXiv preprint arXiv:1802.05313"},{"issue":"4","key":"11733_CR30","doi-asserted-by":"publisher","first-page":"10.3390\/s210412","DOI":"10.3390\/s21041278","volume":"21","author":"J Hua","year":"2021","unstructured":"Hua J, Zeng L, Li G, Ju Z (2021) Learning for a robot: deep reinforcement learning, imitation learning, transfer learning. Sensors 21(4):10.3390\/s21041278","journal-title":"Sensors"},{"key":"11733_CR31","unstructured":"Zinkevich M, Balch TR (2001) Symmetry in Markov decision processes and its implications for single agent and multiagent learning. In: Proceedings of the eighteenth international conference on machine learning, p 632"},{"key":"11733_CR32","unstructured":"Achiam J (2018) Spinning up in deep reinforcement learning. https:\/\/spinningup.openai.com\/en\/latest\/"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11733-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11733-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11733-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T05:19:42Z","timestamp":1771046382000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11733-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["11733"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11733-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"1 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The authors declare their consent for publishing this work.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"24"}}