{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T04:30:48Z","timestamp":1769747448704,"version":"3.49.0"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2022,11,8]],"date-time":"2022-11-08T00:00:00Z","timestamp":1667865600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,8]],"date-time":"2022-11-08T00:00:00Z","timestamp":1667865600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["761758 HumanE-AI-Net"],"award-info":[{"award-number":["761758 HumanE-AI-Net"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004794","name":"Centre National de la Recherche Scientifique","doi-asserted-by":"publisher","award":["INS2I Appel Unique Programme, APIER Project"],"award-info":[{"award-number":["INS2I Appel Unique Programme, APIER Project"]}],"id":[{"id":"10.13039\/501100004794","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005902","name":"Delegation Generale pour l\u2019Armement","doi-asserted-by":"publisher","award":["Allocations de these"],"award-info":[{"award-number":["Allocations de these"]}],"id":[{"id":"10.13039\/501100005902","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001665","name":"Agence Nationale de la Recherche","doi-asserted-by":"crossref","award":["ANR-12-CORD-0030 Roboergosum Project"],"award-info":[{"award-number":["ANR-12-CORD-0030 Roboergosum Project"]}],"id":[{"id":"10.13039\/501100001665","id-type":"DOI","asserted-by":"crossref"}]},{"name":"ANR\/FWF","award":["ANR-21-CE33-0019-01"],"award-info":[{"award-number":["ANR-21-CE33-0019-01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J of Soc Robotics"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s12369-022-00942-6","type":"journal-article","created":{"date-parts":[[2022,11,8]],"date-time":"2022-11-08T15:57:11Z","timestamp":1667923031000},"page":"1297-1323","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Reducing Computational Cost During Robot Navigation and Human\u2013Robot Interaction with a Human-Inspired Reinforcement Learning Architecture"],"prefix":"10.1007","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7322-2523","authenticated-orcid":false,"given":"R\u00e9mi","family":"Dromnelle","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3282-8972","authenticated-orcid":false,"given":"Erwan","family":"Renaudo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2920-4539","authenticated-orcid":false,"given":"Mohamed","family":"Chetouani","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0534-2707","authenticated-orcid":false,"given":"Petros","family":"Maragos","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7822-0634","authenticated-orcid":false,"given":"Raja","family":"Chatila","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8117-7064","authenticated-orcid":false,"given":"Beno\u00eet","family":"Girard","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2515-1046","authenticated-orcid":false,"given":"Mehdi","family":"Khamassi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,8]]},"reference":[{"key":"942_CR1","first-page":"315","volume":"17","author":"R Alami","year":"1998","unstructured":"Alami R, Chatila R, Fleury S, Ghallab M, Ingrand F (1998) An architecture for autonomy. IJRR J 17:315\u2013337","journal-title":"IJRR J"},{"key":"942_CR2","unstructured":"Alami R, Warnier M, Guitton J, Lemaignan S, Sisbot EA (2011) When the robot considers the human. In: Proceedings of the 15th international symposium on robotics research"},{"key":"942_CR3","doi-asserted-by":"crossref","unstructured":"Banquet J-P, Hanoune S, Gaussier P, Quoy M (2016) From cognitive to habit behavior during navigation, through cortical-basal ganglia loops. In: International conference on artificial neural networks. Springer, pp 238\u2013247","DOI":"10.1007\/978-3-319-44778-0_28"},{"key":"942_CR4","doi-asserted-by":"crossref","unstructured":"Caluwaerts K, Favre-F\u00e9lix A, Staffa M, N\u2019Guyen S, Grand C, Girard B, Khamassi M (2012) Neuro-inspired navigation strategies shifting for robots: integration of a multiple landmark taxon strategy. In: Prescott TJ et\u00a0al (eds) Living machines 2012. LNAI, vol 7375\/2012, pp 62\u201373","DOI":"10.1007\/978-3-642-31525-1_6"},{"key":"942_CR5","doi-asserted-by":"publisher","DOI":"10.1088\/1748-3182\/7\/2\/025009","volume":"7","author":"K Caluwaerts","year":"2012","unstructured":"Caluwaerts K, Staffa M, N\u2019Guyen S, Grand C, Doll\u00e9 L, Favre-F\u00e9lix A, Girard B, Khamassi M (2012) A biologically inspired meta-control navigation system for the psikharpax rat robot. Bioinspiration Biomim 7:025009","journal-title":"Bioinspiration Biomim"},{"issue":"6","key":"942_CR6","doi-asserted-by":"publisher","first-page":"2877","DOI":"10.1152\/jn.00145.2018","volume":"120","author":"R Caz\u00e9","year":"2018","unstructured":"Caz\u00e9 R, Khamassi M, Aubin L, Girard B (2018) Hippocampal replays under the scrutiny of reinforcement learning models. J Neurophysiol 120(6):2877\u20132896","journal-title":"J Neurophysiol"},{"issue":"1","key":"942_CR7","doi-asserted-by":"publisher","first-page":"88","DOI":"10.3389\/frobt.2018.00088","volume":"5","author":"R Chatila","year":"2018","unstructured":"Chatila R, Renaudo E, Andries M, Chavez-Garcia RO, Luce-Vayrac P, Gottstein R, Alami R, Clodic A, Devin S, Girard B, Khamassi M (2018) Toward self-aware robots. Front Robot AI 5(1):88\u2013108","journal-title":"Front Robot AI"},{"key":"942_CR8","unstructured":"Chebotar Y, Hausman K, Zhang M, Sukhatme G, Schaal S, Levine S (2017) Combining model-based and model-free updates for trajectory-centric reinforcement learning. In: International conference on machine learning. PMLR, pp 703\u2013711"},{"issue":"6","key":"942_CR9","doi-asserted-by":"publisher","first-page":"1204","DOI":"10.1016\/j.neuron.2011.02.027","volume":"69","author":"ND Daw","year":"2011","unstructured":"Daw ND, Gershman SJ, Seymour B, Dayan P, Dolan RJ (2011) Model-based influences on humans\u2019 choices and striatal prediction errors. Neuron 69(6):1204\u20131215","journal-title":"Neuron"},{"issue":"12","key":"942_CR10","doi-asserted-by":"publisher","first-page":"1704","DOI":"10.1038\/nn1560","volume":"8","author":"ND Daw","year":"2005","unstructured":"Daw ND, Niv Y, Dayan P (2005) Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control. Nat Neurosci 8(12):1704\u20131711","journal-title":"Nat Neurosci"},{"issue":"4","key":"942_CR11","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1007\/s00422-010-0400-z","volume":"103","author":"L Doll\u00e9","year":"2010","unstructured":"Doll\u00e9 L, Sheynikhovich D, Girard B, Chavarriaga R, Guillot A (2010) Path planning versus cue responding: a bioinspired model of switching between navigation strategies. Biol Cybern 103(4):299\u2013317","journal-title":"Biol Cybern"},{"key":"942_CR12","doi-asserted-by":"crossref","unstructured":"Doll\u00e9 L, Khamassi M, Girard B, Guillot A, Chavarriaga R (2008) Analyzing interactions between navigation strategies using a computational model of action selection. In: International conference on spatial cognition, pp 71\u201386","DOI":"10.1007\/978-3-540-87601-4_8"},{"key":"942_CR13","doi-asserted-by":"publisher","first-page":"59","DOI":"10.3389\/fnbot.2018.00059","volume":"12","author":"S Doncieux","year":"2018","unstructured":"Doncieux S, Filliat D, D\u00edaz-Rodr\u00edguez N, Hospedales T, Duro R, Coninx A, Roijers DM, Girard B, Perrin N, Sigaud O (2018) Open-ended learning: a conceptual framework based on representational redescription. Front Neurorobot 12:59","journal-title":"Front Neurorobot"},{"key":"942_CR14","unstructured":"Doncieux S, Bredeche N, Le Goff L, Girard B, Coninx A, Sigaud O, Khamassi M, D\u00edaz-Rodr\u00edguez N, Filliat D, Hospedales T et\u00a0al (2020) Dream architecture: a developmental approach to open-ended learning in robotics. arXiv preprint arXiv:2005.06223"},{"key":"942_CR15","doi-asserted-by":"crossref","unstructured":"Dromnelle R, Girard B, Renaudo E, Chatila R, Khamassi M (2020) Coping with the variability in humans reward during simulated human\u2013robot interactions through the coordination of multiple learning strategies. In: 2020 29th IEEE international conference on robot and human interactive communication (RO-MAN). IEEE, pp 612\u2013617","DOI":"10.1109\/RO-MAN47096.2020.9223451"},{"key":"942_CR16","doi-asserted-by":"crossref","unstructured":"Dromnelle R, Renaudo E, Pourcel G, Chatila R, Girard B, Khamassi M (2020) How to reduce computation time while sparing performance during robot navigation? a neuro-inspired architecture for autonomous shifting between model-based and model-free learning. In: Conference on biomimetic and biohybrid systems. Springer, pp 68\u201379","DOI":"10.1007\/978-3-030-64313-3_8"},{"key":"942_CR17","doi-asserted-by":"crossref","unstructured":"Dunn OJ (1964) Multiple comparisons using rank sums technometrics 6:241\u2013252. Find this article online","DOI":"10.1080\/00401706.1964.10490181"},{"issue":"1\u20132","key":"942_CR18","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S D\u017eeroski","year":"2001","unstructured":"D\u017eeroski S, De Raedt L, Driessens K (2001) Relational reinforcement learning. Mach Learn 43(1\u20132):7\u201352","journal-title":"Mach Learn"},{"key":"942_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3405450","volume":"10","author":"D Feil-Seifer","year":"2020","unstructured":"Feil-Seifer D, Haring KS, Rossi S, Wagner AR, Williams T (2020) Where to next? The impact of Covid-19 on human\u2013robot interaction research. ACM Trans Hum Robot Interact 10:1\u20137","journal-title":"ACM Trans Hum Robot Interact"},{"key":"942_CR20","unstructured":"Gat E (1998) On three-layer architectures. In: Artificial intelligence and mobile robots. MIT Press"},{"key":"942_CR21","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1177\/105971230501300204","volume":"13","author":"B Girard","year":"2005","unstructured":"Girard B, Filliat D, Meyer J-A, Berthoz A, Guillot A (2005) Integration of navigation and action selection functionalities in a computational model of cortico\u2013basal ganglia\u2013thalamo-cortical loops. Adapt Behav 13:2","journal-title":"Adapt Behav"},{"key":"942_CR22","unstructured":"Griffith S, Subramanian K, Scholz J, Isbell CL, Thomaz AL (2013) Policy shaping: integrating human feedback with reinforcement learning. In: Advances in neural information processing systems, vol 26"},{"issue":"1","key":"942_CR23","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1109\/TRO.2006.889486","volume":"23","author":"G Grisetti","year":"2007","unstructured":"Grisetti G, Stachniss C, Burgard W (2007) Improved techniques for grid mapping with rao-blackwellized particle filters. Trans Robot 23(1):34\u201346. https:\/\/doi.org\/10.1109\/TRO.2006.889486","journal-title":"Trans Robot"},{"key":"942_CR24","doi-asserted-by":"crossref","unstructured":"Hafez MB, Weber C, Kerzel M, Wermter S (2019) Curious meta-controller: adaptive alternation between model-based and model-free control in deep reinforcement learning. In: 2019 international joint conference on neural networks (IJCNN). IEEE, pp 1\u20138","DOI":"10.1109\/IJCNN.2019.8852254"},{"key":"942_CR25","doi-asserted-by":"publisher","first-page":"42","DOI":"10.3389\/frobt.2020.00042","volume":"7","author":"S Hangl","year":"2020","unstructured":"Hangl S, Dunjko V, Briegel HJ, Piater J (2020) Skill learning by autonomous robotic playing using active learning and exploratory behavior composition. Front Robot AI 7:42","journal-title":"Front Robot AI"},{"issue":"8","key":"942_CR26","doi-asserted-by":"publisher","first-page":"1242","DOI":"10.1016\/j.neunet.2006.06.007","volume":"19","author":"M Haruno","year":"2006","unstructured":"Haruno M, Kawato M (2006) Heterarchical reinforcement-learning model for integration of multiple cortico-striatal loops: fmRI examination in stimulus-action-reward association learning. Neural Netw 19(8):1242\u20131254","journal-title":"Neural Netw"},{"issue":"10","key":"942_CR27","doi-asserted-by":"publisher","first-page":"464","DOI":"10.1016\/S0166-2236(99)01439-3","volume":"22","author":"O Hikosaka","year":"1999","unstructured":"Hikosaka O, Nakahara H, Rand MK, Sakai K, Lu X, Nakamura K, Miyachi S, Doya K (1999) Parallel neural networks for learning sequential procedures. Trends Neurosci 22(10):464\u2013471","journal-title":"Trends Neurosci"},{"issue":"4\u20135","key":"942_CR28","doi-asserted-by":"publisher","first-page":"698","DOI":"10.1177\/0278364920987859","volume":"40","author":"J Ibarz","year":"2021","unstructured":"Ibarz J, Tan J, Finn C, Kalakrishnan M, Pastor P, Levine S (2021) How to train your robot with deep reinforcement learning: lessons we have learned. Intl J Robot Res 40(4\u20135):698\u2013721","journal-title":"Intl J Robot Res"},{"key":"942_CR29","doi-asserted-by":"publisher","first-page":"16","DOI":"10.3389\/fnbot.2013.00016","volume":"7","author":"A Jauffret","year":"2013","unstructured":"Jauffret A, Cuperlier N, Gaussier P, Tarroux P (2013) From self-assessment to frustration, a small step toward autonomy in robotic navigation. Front Neurorobot 7:16","journal-title":"Front Neurorobot"},{"key":"942_CR30","doi-asserted-by":"crossref","unstructured":"Judah K, Roy S, Fern A, Dietterich T (2010) Reinforcement learning via practice and critique advice. In: Proceedings of the AAAI conference on artificial intelligence, vol 24, pp 481\u2013486","DOI":"10.1609\/aaai.v24i1.7690"},{"key":"942_CR31","doi-asserted-by":"crossref","unstructured":"Justus D, Brennan J, Bonner S, McGough AS (2018) Predicting the computational cost of deep learning models. In: 2018 IEEE international conference on big data (Big Data). IEEE, pp 3873\u20133882","DOI":"10.1109\/BigData.2018.8622396"},{"issue":"5","key":"942_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pcbi.1002055","volume":"7","author":"M Keramati","year":"2011","unstructured":"Keramati M, Dezfouli A, Piray P (2011) Speed\/accuracy trade-off between the habitual and goal-directed processes. PLoS Comput Biol 7(5):1\u201325","journal-title":"PLoS Comput Biol"},{"key":"942_CR33","doi-asserted-by":"publisher","first-page":"79","DOI":"10.3389\/fnbeh.2012.00079","volume":"6","author":"M Khamassi","year":"2012","unstructured":"Khamassi M, Humphries MD (2012) Integrating cortico-limbic-basal ganglia architectures for learning model-based and model-free navigation strategies. Front Behav Neurosci 6:79","journal-title":"Front Behav Neurosci"},{"key":"942_CR34","doi-asserted-by":"crossref","unstructured":"Khamassi M, Wilson C, Roth\u00e9 R, Quilodran R, Dominey PF, Procyk E (2011) Meta-learning, cognitive control, and physiological interactions between medial and lateral prefrontal cortex. In: Neural basis of motivational and cognitive control, pp 351\u2013370","DOI":"10.7551\/mitpress\/8791.003.0025"},{"issue":"4","key":"942_CR35","doi-asserted-by":"publisher","first-page":"881","DOI":"10.1109\/TCDS.2018.2843122","volume":"10","author":"M Khamassi","year":"2018","unstructured":"Khamassi M, Velentzas G, Tsitsimis T, Tzafestas C (2018) Robot fast adaptation to changes in human engagement during simulated dynamic social interaction with active exploration in parameterized reinforcement learning. IEEE Trans Cognit Dev Syst 10(4):881\u2013893","journal-title":"IEEE Trans Cognit Dev Syst"},{"key":"942_CR36","doi-asserted-by":"crossref","unstructured":"Knox WB, Stone P (2009) Interactively shaping agents via human reinforcement: the tamer framework. In: Proceedings of the fifth international conference on knowledge capture, pp 9\u201316","DOI":"10.1145\/1597735.1597738"},{"key":"942_CR37","unstructured":"Knox WB, Stone P (2012) Reinforcement learning from simultaneous human and mdp reward. In: AAMAS, pp 475\u2013482"},{"key":"942_CR38","unstructured":"Knox WB, Taylor ME, Stone P (2011) Understanding human teaching modalities in reinforcement learning environments: a preliminary report. In: IJCAI 2011 workshop on agents learning interactively from human teachers (ALIHT)"},{"issue":"11","key":"942_CR39","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell AJ, Peters J (2013) Reinforcement learning in robotics: a survey. IJRR J 32(11):1238\u20131274. https:\/\/doi.org\/10.1177\/0278364913495721","journal-title":"IJRR J"},{"issue":"1","key":"942_CR40","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1109\/TEVC.2012.2185849","volume":"17","author":"S Koos","year":"2012","unstructured":"Koos S, Mouret J-B, Doncieux S (2012) The transferability approach: crossing the reality gap in evolutionary robotics. IEEE Trans Evol Comput 17(1):122\u2013145","journal-title":"IEEE Trans Evol Comput"},{"issue":"3","key":"942_CR41","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1016\/j.neuron.2013.11.028","volume":"81","author":"SW Lee","year":"2014","unstructured":"Lee SW, Shimojo S, O\u2019Doherty JP (2014) Neural computations underlying arbitration between model-based and model-free learning. Neuron 81(3):687\u2013699","journal-title":"Neuron"},{"key":"942_CR42","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.neunet.2015.09.006","volume":"72","author":"M Llofriu","year":"2015","unstructured":"Llofriu M, Tejera G, Contreras M, Pelc T, Fellous J-M, Weitzenfeld A (2015) Goal-oriented robot navigation learning using a multi-scale space representation. Neural Netw 72:62\u201374","journal-title":"Neural Netw"},{"key":"942_CR43","unstructured":"Lowrey K, Rajeswaran A, Kakade S, Todorov E, Mordatch I (2019) Plan online, learn offline: efficient learning and exploration via model-based control. In: International conference on learning representations"},{"key":"942_CR44","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1016\/j.neunet.2015.10.004","volume":"72","author":"G Maffei","year":"2015","unstructured":"Maffei G, Santos-Pata D, Marcos E, S\u00e1nchez-Fibla M, Verschure PFMJ (2015) An embodied biologically constrained model of foraging: from classical and operant conditioning to adaptive real-world behavior in dac-x. Neural Netw 72:88\u2013108","journal-title":"Neural Netw"},{"key":"942_CR45","doi-asserted-by":"publisher","first-page":"1395","DOI":"10.1007\/978-3-540-30301-5_61","volume-title":"Handbook of robotics","author":"J-A Meyer","year":"2008","unstructured":"Meyer J-A, Guillot A (2008) Biologically-inspired robots. In: Siciliano B, Khatib O (eds) Handbook of robotics. Springer, Berlin, pp 1395\u20131422"},{"issue":"7540","key":"942_CR46","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"942_CR47","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1146\/annurev-psych-010416-044216","volume":"68","author":"JP O\u2019Doherty","year":"2017","unstructured":"O\u2019Doherty JP, Cockburn J, Pauli WM (2017) Learning, reward, and decision making. Ann Rev Psychol 68:73\u2013100","journal-title":"Ann Rev Psychol"},{"key":"942_CR48","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00092\/abstract","author":"G Pezzulo","year":"2013","unstructured":"Pezzulo G, Rigoli F, Chersi F (2013) The mixed instrumental controller: using value of information to combine habitual choice and mental simulation. Front Psychol. https:\/\/doi.org\/10.3389\/fpsyg.2013.00092\/abstract","journal-title":"Front Psychol"},{"key":"942_CR49","doi-asserted-by":"publisher","unstructured":"Powell T, Sammut-Bonnici T (2015) Pareto analysis. ISBN 9781118785317. https:\/\/doi.org\/10.1002\/9781118785317.weom120202","DOI":"10.1002\/9781118785317.weom120202"},{"key":"942_CR50","unstructured":"Quigley M, Conley K, Gerkey BP, Faust J, Foote T, Leibs J, Wheeler R, Ng AY (2009) Ros: an open-source robot operating system. In: ICRA workshop on open source software"},{"key":"942_CR51","doi-asserted-by":"publisher","unstructured":"Renaudo E, Girard B, Chatila R, Khamassi M (2014) Design of a control architecture for habit learning in robots. In: Biomimetic and biohybrid systems, LNAI proceedings, pp 249\u2013260. https:\/\/doi.org\/10.1007\/978-3-319-09435-9_22","DOI":"10.1007\/978-3-319-09435-9_22"},{"key":"942_CR52","unstructured":"Renaudo E, Devin S, Girard B, Chatila R, Alami R, Khamassi M, Clodic A (2015) Learning to interact with humans using goal-directed and habitual behaviors"},{"key":"942_CR53","doi-asserted-by":"crossref","unstructured":"Renaudo E, Girard B, Chatila R, Khamassi M (2015) Which criteria for autonomously shifting between goal-directed and habitual behaviors in robots? In: 5th international conference on development and learning and on epigenetic robotics (ICDL-EPIROB), Providence, RI, USA, pp 254\u2013260","DOI":"10.1109\/DEVLRN.2015.7346152"},{"key":"942_CR54","doi-asserted-by":"crossref","unstructured":"Renaudo E, Girard B, Chatila R, Khamassi M (2015) Respective advantages and disadvantages of model-based and model-free reinforcement learning in a robotics neuro-inspired cognitive architecture. In: Biologically inspired cognitive architectures BICA 2015, Lyon, France, pp 178\u2013184","DOI":"10.1016\/j.procs.2015.12.194"},{"issue":"3","key":"942_CR55","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1007\/s12369-019-00602-2","volume":"12","author":"DM Rojas-Castro","year":"2020","unstructured":"Rojas-Castro DM, Revel A, Menard M (2020) Rhizome architecture: an adaptive neurobehavioral control architecture for cognitive mobile robots-application in a vision-based indoor robot navigation context. Int J Soc Robot 12(3):659\u2013688","journal-title":"Int J Soc Robot"},{"key":"942_CR56","doi-asserted-by":"crossref","unstructured":"Rutard F, Sigaud O, Chetouani M (2020) Tirl: enriching actor-critic rl with non-expert human teachers and a trust model. In: 2020 29th IEEE international conference on robot and human interactive communication (RO-MAN). IEEE, pp 604\u2013611","DOI":"10.1109\/RO-MAN47096.2020.9223530"},{"key":"942_CR57","doi-asserted-by":"publisher","first-page":"52","DOI":"10.3389\/fnbot.2019.00052","volume":"13","author":"F Sheikhnezhad Fard","year":"2019","unstructured":"Sheikhnezhad Fard F, Trappenberg TP (2019) A novel model for arbitration between planning and habitual control systems. Front Neurorobot 13:52. https:\/\/doi.org\/10.3389\/fnbot.2019.00052","journal-title":"Front Neurorobot"},{"issue":"2","key":"942_CR58","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1016\/j.neuron.2013.07.007","volume":"79","author":"A Shenhav","year":"2013","unstructured":"Shenhav A, Botvinick MM, Cohen JD (2013) The expected value of control: an integrative theory of anterior cingulate cortex function. Neuron 79(2):217\u2013240","journal-title":"Neuron"},{"key":"942_CR59","doi-asserted-by":"crossref","unstructured":"Strubell E, Ganesh A, McCallum A (2019) Energy and policy considerations for deep learning in nlp. arXiv preprint arXiv:1906.02243","DOI":"10.18653\/v1\/P19-1355"},{"key":"942_CR60","volume-title":"Introduction to reinforcement learning","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Introduction to reinforcement learning, 1st edn. MIT Press, Cambridge","edition":"1"},{"issue":"4","key":"942_CR61","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1177\/1073858411435128","volume":"18","author":"M Van Der Meer","year":"2012","unstructured":"Van Der Meer M, Kurth-Nelson Z, Redish AD (2012) Information processing in decision-making systems. The Neuroscientist 18(4):342\u2013359","journal-title":"The Neuroscientist"},{"key":"942_CR62","doi-asserted-by":"publisher","DOI":"10.3389\/fnbeh.2015.00225","author":"G Viejo","year":"2015","unstructured":"Viejo G, Khamassi M, Brovelli A, Girard B (2015) Modelling choice and reaction time during arbitrary visuomotor learning through the coordination of adaptive working memory and reinforcement learning. Front Behav Neurosci. https:\/\/doi.org\/10.3389\/fnbeh.2015.00225","journal-title":"Front Behav Neurosci"},{"issue":"6","key":"942_CR63","doi-asserted-by":"publisher","first-page":"860","DOI":"10.1038\/s41593-018-0147-8","volume":"21","author":"JX Wang","year":"2018","unstructured":"Wang JX, Kurth-Nelson Z, Kumaran D, Tirumala D, Soyer H, Leibo JZ, Hassabis D, Botvinick M (2018) Prefrontal cortex as a meta-reinforcement learning system. Nat Neurosci 21(6):860\u2013868","journal-title":"Nat Neurosci"},{"key":"942_CR64","unstructured":"Wang T, Bao X, Clavera I, Hoang J, Wen Y, Langlois E, Zhang S, Zhang G, Abbeel P, Ba J (2019) Benchmarking model-based reinforcement learning. arXiv preprint arXiv:1907.02057"},{"issue":"2","key":"942_CR65","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TCDS.2016.2624705","volume":"9","author":"M Zambelli","year":"2016","unstructured":"Zambelli M, Demiris Y (2016) Online multimodal ensemble learning using self-learned sensorimotor representations. IEEE Trans Cognit Dev Syst 9(2):113\u2013126","journal-title":"IEEE Trans Cognit Dev Syst"},{"key":"942_CR66","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/j.neuropsychologia.2018.09.013","volume":"123","author":"A Zenon","year":"2019","unstructured":"Zenon A, Solopchuk O, Pezzulo G (2019) An information-theoretic perspective on the costs of cognition. Neuropsychologia 123:5\u201318","journal-title":"Neuropsychologia"}],"container-title":["International Journal of Social Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12369-022-00942-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12369-022-00942-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12369-022-00942-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,29]],"date-time":"2023-08-29T10:25:20Z","timestamp":1693304720000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12369-022-00942-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,8]]},"references-count":66,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["942"],"URL":"https:\/\/doi.org\/10.1007\/s12369-022-00942-6","relation":{},"ISSN":["1875-4791","1875-4805"],"issn-type":[{"value":"1875-4791","type":"print"},{"value":"1875-4805","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,8]]},"assertion":[{"value":"25 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 November 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interest"}}]}}