{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:28:17Z","timestamp":1757312897080,"version":"3.41.0"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2022,6,7]],"date-time":"2022-06-07T00:00:00Z","timestamp":1654560000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,7]],"date-time":"2022-06-07T00:00:00Z","timestamp":1654560000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Onderzoeksprogramma Artifici\u00eble Intelligentie (AI) Vlaanderen"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-022-07396-x","type":"journal-article","created":{"date-parts":[[2022,6,7]],"date-time":"2022-06-07T12:13:19Z","timestamp":1654603999000},"page":"13101-13117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A framework for flexibly guiding learning agents"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9094-4221","authenticated-orcid":false,"given":"Mahmoud","family":"Elbarbari","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florent","family":"Delgrange","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ivo","family":"Vervlimmeren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyriakos","family":"Efthymiadis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bram","family":"Vanderborght","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,7]]},"reference":[{"key":"7396_CR1","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038","author":"R Bellman","year":"1957","unstructured":"Bellman R (1957) A Markovian decision process. J Math Mech. https:\/\/doi.org\/10.1512\/iumj.1957.6.56038","journal-title":"J Math Mech"},{"key":"7396_CR2","doi-asserted-by":"publisher","unstructured":"Camacho A, Icarte RT, Klassen TQ, et al (2019) LTL and beyond: formal languages for reward function specification in reinforcement learning. In: IJCAI, pp 6065\u20136073. https:\/\/doi.org\/10.24963\/ijcai.2019\/840","DOI":"10.24963\/ijcai.2019\/840"},{"issue":"746\u2013752","key":"7396_CR3","first-page":"2","volume":"1998","author":"C Claus","year":"1998","unstructured":"Claus C, Boutilier C (1998) The dynamics of reinforcement learning in cooperative multiagent systems. AAAI\/IAAI 1998(746\u2013752):2","journal-title":"AAAI\/IAAI"},{"key":"7396_CR4","unstructured":"De Giacomo G, Vardi MY (2013) Linear temporal logic and linear dynamic logic on finite traces. In: Twenty-third international joint conference on artificial intelligence"},{"key":"7396_CR5","doi-asserted-by":"crossref","unstructured":"De Giacomo G, Iocchi L, Favorito M, et al (2019) Foundations for restraining bolts: reinforcement learning with ltlf\/ldlf restraining specifications. In: Proceedings of the international conference on automated planning and scheduling, pp 128\u2013136","DOI":"10.1609\/icaps.v29i1.3549"},{"issue":"1","key":"7396_CR6","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1017\/S0269888915000193","volume":"31","author":"YM De Hauwere","year":"2016","unstructured":"De Hauwere YM, Devlin S, Kudenko D et al (2016) Context-sensitive reward shaping for sparse interaction multi-agent systems. Knowl Eng Rev 31(1):59\u201376. https:\/\/doi.org\/10.1017\/S0269888915000193","journal-title":"Knowl Eng Rev"},{"issue":"4","key":"7396_CR7","doi-asserted-by":"publisher","first-page":"104","DOI":"10.3390\/robotics8040104","volume":"8","author":"J De Winter","year":"2019","unstructured":"De Winter J, De Beir A, El Makrini I et al (2019) Accelerating interactive reinforcement learning by human advice for an assembly task by a cobot. Robotics 8(4):104. https:\/\/doi.org\/10.3390\/robotics8040104","journal-title":"Robotics"},{"key":"7396_CR8","unstructured":"Devlin S, Kudenko D (2011) Theoretical considerations of potential-based reward shaping for multi-agent systems. In: The 10th international conference on autonomous agents and multiagent systems. ACM, pp 225\u2013232"},{"issue":"1","key":"7396_CR9","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1017\/S0269888915000181","volume":"31","author":"S Devlin","year":"2016","unstructured":"Devlin S, Kudenko D (2016) Plan-based reward shaping for multi-agent reinforcement learning. Knowl Eng Rev 31(1):44\u201358","journal-title":"Knowl Eng Rev"},{"key":"7396_CR10","unstructured":"Devlin SM, Kudenko D (2012) Dynamic potential-based reward shaping. In: Proceedings of the 11th international conference on autonomous agents and multiagent systems. IFAAMAS, pp 433\u2013440"},{"key":"7396_CR11","unstructured":"Giacomo GD, Vardi MY (2015) Synthesis for LTL and LDL on finite traces. In: Yang Q, Wooldridge MJ (eds) Proceedings of the twenty-fourth international joint conference on artificial intelligence, IJCAI 2015, Buenos Aires, Argentina, July 25\u201331, 2015. AAAI Press, pp 1558\u20131564"},{"key":"7396_CR12","unstructured":"Grzes M (2017) Reward shaping in episodic reinforcement learning. In: AAMAS \u201917. International foundation for autonomous agents and multiagent systems, pp 565\u2013573"},{"key":"7396_CR13","doi-asserted-by":"publisher","unstructured":"Grzes M, Kudenko D (2008) Plan-based reward shaping for reinforcement learning. In: 2008 4th international IEEE conference intelligent systems. IEEE, pp 10\u201322. https:\/\/doi.org\/10.1109\/IS.2008.4670492","DOI":"10.1109\/IS.2008.4670492"},{"issue":"1","key":"7396_CR14","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1145\/568438.568455","volume":"32","author":"JE Hopcroft","year":"2001","unstructured":"Hopcroft JE, Motwani R, Ullman JD (2001) Introduction to automata theory, languages, and computation. ACM SIGACT News 32(1):60\u201365. https:\/\/doi.org\/10.1145\/568438.568455","journal-title":"ACM SIGACT News"},{"key":"7396_CR15","unstructured":"Icarte RT, Klassen T, Valenzano R, et al (2018) Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International conference on machine learning. PMLR, pp 2107\u20132116"},{"key":"7396_CR16","doi-asserted-by":"crossref","unstructured":"Icarte RT, Klassen TQ, Valenzano RA, et al (2018) Advice-based exploration in model-based reinforcement learning. In: Canadian conference on artificial intelligence. Springer, pp 72\u201383","DOI":"10.1007\/978-3-319-89656-4_6"},{"key":"7396_CR17","doi-asserted-by":"crossref","unstructured":"Judah K, Roy S, Fern A, et al (2010) Reinforcement learning via practice and critique advice. In: Twenty-fourth AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v24i1.7690"},{"key":"7396_CR18","doi-asserted-by":"publisher","unstructured":"Knox WB, Stone P (2009) Interactively shaping agents via human reinforcement: the TAMER framework. In: Proceedings of the fifth international conference on Knowledge capture, pp 9\u201316. https:\/\/doi.org\/10.1145\/1597735.1597738","DOI":"10.1145\/1597735.1597738"},{"issue":"7540","key":"7396_CR19","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"7396_CR20","doi-asserted-by":"publisher","unstructured":"Neary C, Xu Z, Wu B, et al (2021) Reward machines for cooperative multi-agent reinforcement learning. In: AAMAS \u201921, p 934\u2013942. https:\/\/doi.org\/10.48448\/cawm-bw32","DOI":"10.48448\/cawm-bw32"},{"key":"7396_CR21","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations: theory and application to reward shaping. In: ICML, pp 278\u2013287"},{"key":"7396_CR22","first-page":"1043","volume":"10","author":"R Parr","year":"1997","unstructured":"Parr R, Russell S (1997) Reinforcement learning with hierarchies of machines. Adv Neural Inf Process Syst 10:1043\u20131049","journal-title":"Adv Neural Inf Process Syst"},{"key":"7396_CR23","unstructured":"Peng B, MacGlashan J, Loftin R, et al (2016) A need for speed: adapting agent action speed to improve task learning from non-expert humans. In: Proceedings of the international joint conference on autonomous agents and multiagent systems"},{"key":"7396_CR24","doi-asserted-by":"publisher","unstructured":"Pnueli A (1977) The temporal logic of programs. In: 18th annual symposium on foundations of computer science (sfcs 1977). IEEE, pp 46\u201357. https:\/\/doi.org\/10.1109\/SFCS.1977.32","DOI":"10.1109\/SFCS.1977.32"},{"key":"7396_CR25","unstructured":"Randl\u00f8v J, Alstr\u00f8m P (1998) Learning to drive a bicycle using reinforcement learning and shaping. In: ICML, pp 463\u2013471"},{"issue":"3","key":"7396_CR26","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S Singh","year":"2000","unstructured":"Singh S, Jaakkola T, Littman ML et al (2000) Convergence results for single-step on-policy reinforcement-learning algorithms. Mach Learn 38(3):287\u2013308. https:\/\/doi.org\/10.1023\/A:1007678930559","journal-title":"Mach Learn"},{"key":"7396_CR27","doi-asserted-by":"publisher","unstructured":"Suay HB, Chernova S (2011) Effect of human guidance and state space size on interactive reinforcement learning. In: 2011 Ro-Man. IEEE, pp 1\u20136. https:\/\/doi.org\/10.1109\/ROMAN.2011.6005223","DOI":"10.1109\/ROMAN.2011.6005223"},{"key":"7396_CR28","volume-title":"Introduction to reinforcement learning","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG et al (1998) Introduction to reinforcement learning, vol 135. MIT Press, Cambridge"},{"key":"7396_CR29","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2007.4354078","author":"AL Thomaz","year":"2007","unstructured":"Thomaz AL, Breazeal C (2007) Robot learning via socially guided exploration. Dev Learn. https:\/\/doi.org\/10.1109\/DEVLRN.2007.4354078","journal-title":"Dev Learn"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07396-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07396-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07396-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:27:37Z","timestamp":1751012857000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07396-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,7]]},"references-count":29,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["7396"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07396-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2022,6,7]]},"assertion":[{"value":"9 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Coflict of interest"}}]}}