{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T13:20:50Z","timestamp":1776864050910,"version":"3.51.2"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,2,18]],"date-time":"2023-02-18T00:00:00Z","timestamp":1676678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,18]],"date-time":"2023-02-18T00:00:00Z","timestamp":1676678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106172"],"award-info":[{"award-number":["62106172"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1836214"],"award-info":[{"award-number":["U1836214"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010229","name":"Natural Science Foundation of Tianjin Municipal Science and Technology Commission","doi-asserted-by":"publisher","award":["56917ZXRGGX00150"],"award-info":[{"award-number":["56917ZXRGGX00150"]}],"id":[{"id":"10.13039\/501100010229","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006606","name":"Natural Science Foundation of Tianjin City","doi-asserted-by":"publisher","award":["19JCYBJC16300"],"award-info":[{"award-number":["19JCYBJC16300"]}],"id":[{"id":"10.13039\/501100006606","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10458-023-09600-1","type":"journal-article","created":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T16:50:17Z","timestamp":1676825417000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Accelerating deep reinforcement learning via knowledge-guided policy network"],"prefix":"10.1007","volume":"37","author":[{"given":"Yuanqiang","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2741-058X","authenticated-orcid":false,"given":"Yan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,2,18]]},"reference":[{"issue":"2","key":"9600_CR1","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1016\/0888-613X(92)90020-Z","volume":"6","author":"HR Berenji","year":"1992","unstructured":"Berenji, H. R. (1992). A reinforcement learning-based architecture for fuzzy logic control. International Journal of Approximate Reasoning, 6(2), 267\u2013292.","journal-title":"International Journal of Approximate Reasoning"},{"key":"9600_CR2","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., & Zaremba, W. (2016). Openai gym"},{"issue":"1","key":"9600_CR3","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10846-018-0839-z","volume":"95","author":"C Celemin","year":"2019","unstructured":"Celemin, C., & Ruiz-del Solar, J. (2019). An interactive framework for learning continuous actions policies based on corrective feedback. Journal of Intelligent & Robotic Systems, 95(1), 77\u201397.","journal-title":"Journal of Intelligent & Robotic Systems"},{"key":"9600_CR4","unstructured":"Cheng, C.A., Yan, X., Wagener, N., & Boots, B. (2018). Fast policy learning through imitation and reinforcement. arXiv preprint arXiv:1805.10413"},{"key":"9600_CR5","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert, R., Weston, J., Bottou, L., Karlen, M., Kavukcuoglu, K., & Kuksa, P. (2011). Natural language processing (almost) from scratch. Journal of Machine Learning Research, 12, 2493\u20132537.","journal-title":"Journal of Machine Learning Research"},{"key":"9600_CR6","doi-asserted-by":"crossref","unstructured":"Cruz, F., Twiefel, J., Magg, S., Weber, C., & Wermter, S. (2015). Interactive reinforcement learning through speech guidance in a domestic scenario. In: 2015 international joint conference on neural networks (IJCNN), (pp. 1\u20138). IEEE","DOI":"10.1109\/IJCNN.2015.7280477"},{"issue":"3","key":"9600_CR7","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1109\/TITS.2005.853698","volume":"6","author":"X Dai","year":"2005","unstructured":"Dai, X., Li, C. K., & Rad, A. B. (2005). An approach to tune fuzzy controllers based on reinforcement learning for autonomous vehicle control. IEEE Transactions on Intelligent Transportation Systems, 6(3), 285\u2013293.","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"9600_CR8","unstructured":"Eysenbach, B., Salakhutdinov, R.R., & Levine, S. (2019). Search on the replay buffer: Bridging planning and reinforcement learning. Advances in Neural Information Processing Systems 32"},{"key":"9600_CR9","unstructured":"Fischer, M., Balunovic, M., Drachsler-Cohen, D., Gehr, T., Zhang, C., & Vechev, M. (2019). Dl2: Training and querying neural networks with logic. In: Proceedings of international conference on machine learning (pp. 1931\u20131941)."},{"key":"9600_CR10","volume-title":"Neural-symbolic learning systems: Foundations and applications","author":"LAS Garcez","year":"2012","unstructured":"Garcez, A.S.d., Broda, K.B., & Gabbay, D.M. (2012). Neural-symbolic learning systems: Foundations and applications. Berlin: Springer."},{"key":"9600_CR11","unstructured":"Ha, D., Dai, A., & Le, Q.V. (2016). Hypernetworks. arXiv:1609.09106"},{"key":"9600_CR12","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., & Levine, S. (2018). Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning (pp. 1861\u20131870). PMLR."},{"key":"9600_CR13","unstructured":"Ho, J., & Ermon, S. (2016). Generative adversarial imitation learning. In: Advances in neural information processing systems (pp. 4565\u20134573)."},{"key":"9600_CR14","unstructured":"Ho, M.K., Littman, M.L., Cushman, F., & Austerweil, J.L. (2015). Teaching with rewards and punishments: Reinforcement or communication? In: CogSci"},{"key":"9600_CR15","doi-asserted-by":"crossref","unstructured":"Hu, Z., Ma, X., Liu, Z., Hovy, E., & Xing, E. (2016). Harnessing deep neural networks with logic rules. arXiv:1603.06318","DOI":"10.18653\/v1\/P16-1228"},{"issue":"3","key":"9600_CR16","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1109\/21.256541","volume":"23","author":"JS Jang","year":"1993","unstructured":"Jang, J. S. (1993). Anfis: adaptive-network-based fuzzy inference system. IEEE Transactions on Systems, Man, and Cybernetics, 23(3), 665\u2013685.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"9600_CR17","unstructured":"Kingma, D.P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv:1412.6980"},{"key":"9600_CR18","doi-asserted-by":"crossref","unstructured":"Knox, W.B., & Stone, P. (2009). Interactively shaping agents via human reinforcement: The tamer framework. In Proceedings of the fifth international conference on Knowledge capture (pp. 9\u201316)","DOI":"10.1145\/1597735.1597738"},{"key":"9600_CR19","unstructured":"Konda, V.R., & Tsitsiklis, J.N. (2000). Actor-critic algorithms. In: Advances in neural information processing systems (pp. 1008\u20131014). Citeseer"},{"key":"9600_CR20","unstructured":"Kuhlmann, G., Stone, P., Mooney, R., & Shavlik, J. (2004). Guiding a reinforcement learner with natural language advice: Initial results in Robocup Soccer. In The AAAI-2004 workshop on supervisory control of learning and adaptive systems. San Jose, CA"},{"issue":"7553","key":"9600_CR21","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436\u2013444.","journal-title":"Nature"},{"key":"9600_CR22","unstructured":"MacGlashan, J., Ho, M.K., Loftin, R., Peng, B., Wang, G., Roberts, D.L., Taylor, M.E., & Littman, M.L. (2017). Interactive learning from policy-dependent human feedback. In International conference on machine learning (pp. 2285\u20132294). PMLR"},{"key":"9600_CR23","unstructured":"Mathewson, K.W., & Pilarski, P.M. (2016). Simultaneous control and human feedback in the training of a robotic agent with actor-critic reinforcement learning. arXiv preprint arXiv:1606.06979"},{"issue":"7540","key":"9600_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., Graves, A., Riedmiller, M., Fidjeland, A. K., & Ostrovski, G. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529.","journal-title":"Nature"},{"key":"9600_CR25","doi-asserted-by":"crossref","unstructured":"Najar, A., & Chetouani, M. (2021). Reinforcement learning with human advice: a survey. Frontiers in Robotics and AI 8","DOI":"10.3389\/frobt.2021.584075"},{"issue":"1","key":"9600_CR26","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s10994-015-5494-z","volume":"100","author":"L De Raedt","year":"2015","unstructured":"De Raedt, L., & Kimmig, A. (2015). Probabilistic (logic) programming concepts. Machine Learning, 100(1), 5\u201347.","journal-title":"Machine Learning"},{"issue":"1","key":"9600_CR27","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s10994-006-5833-1","volume":"62","author":"M Richardson","year":"2006","unstructured":"Richardson, M., & Domingos, P. (2006). Markov logic networks. Machine Learning, 62(1), 107\u2013136.","journal-title":"Machine Learning"},{"key":"9600_CR28","unstructured":"Rosenstein, M.T., Barto, A.G., Si, J., Barto, A., Powell, W., & Wunsch, D. (2004). Supervised actor-critic reinforcement learning. Learning and approximate dynamic programming: Scaling up to the real world (pp. 359\u2013380)."},{"key":"9600_CR29","unstructured":"Ross, S., Gordon, G., & Bagnell, D. (2011). A reduction of imitation learning and structured prediction to no-regret online learning. In Proceedings of the fourteenth international conference on artificial intelligence and statistics (pp. 627\u2013635). JMLR Workshop and Conference Proceedings."},{"issue":"1","key":"9600_CR30","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1162\/neco.1992.4.1.131","volume":"4","author":"J Schmidhuber","year":"1992","unstructured":"Schmidhuber, J. (1992). Learning to control fast-weight memories: An alternative to dynamic recurrent networks. Neural Computation, 4(1), 131\u2013139.","journal-title":"Neural Computation"},{"key":"9600_CR31","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., & Moritz, P. (2015). Trust region policy optimization. In Proceedings of international conference on machine learning (pp. 1889\u20131897)."},{"key":"9600_CR32","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., & Abbeel, P. (2015). High-dimensional continuous control using generalized advantage estimation. arXiv:1506.02438"},{"key":"9600_CR33","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv:1707.06347"},{"key":"9600_CR34","doi-asserted-by":"crossref","unstructured":"Silva, A., & Gombolay, M. (2021). Encoding human domain knowledge to warm start reinforcement learning. In Proceedings of the AAAI conference on artificial intelligence (vol.\u00a035, pp. 5042\u20135050).","DOI":"10.1609\/aaai.v35i6.16638"},{"issue":"7587","key":"9600_CR35","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., & Van, d.D.G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., & Lanctot, M. (2016). Mastering the game of go with deep neural networks and tree search. Nature, 529(7587), 484\u2013489.","journal-title":"Nature"},{"key":"9600_CR36","unstructured":"Squire, S., Tellex, S., Arumugam, D., & Yang, L. (2015). Grounding English commands to reward functions. In Robotics: Science and systems"},{"key":"9600_CR37","unstructured":"Sun, J., Karray, F., Basir, O., & Kamel, M. (2002). Fuzzy logic-based natural language processing and its application to speech recognition. In 3rd WSES international conference on fuzzy sets and systems (pp 11\u201315)."},{"key":"9600_CR38","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: an introduction. Cambridge: MIT Press."},{"key":"9600_CR39","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1109\/TSMC.1985.6313399","volume":"1","author":"T Takagi","year":"1985","unstructured":"Takagi, T., & Sugeno, M. (1985). Fuzzy identification of systems and its applications to modeling and control. IEEE Transactions on Systems, Man, and Cybernetics, 1, 116\u2013132.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"9600_CR40","unstructured":"Tasfi, N. (2016). Pygame learning environment. https:\/\/github.com\/ntasfi\/PyGame-Learning-Environment"},{"key":"9600_CR41","unstructured":"Vogel, A., & Jurafsky, D. (2010). Learning to follow navigational directions. In Proceedings of the 48th annual meeting of the association for computational linguistics (pp. 806\u2013814)."},{"issue":"3\u20134","key":"9600_CR42","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C. J., & Dayan, P. (1992). Q-learning. Machine learning, 8(3\u20134), 279\u2013292.","journal-title":"Machine learning"},{"key":"9600_CR43","unstructured":"Wu, Y., Mansimov, E., Grosse, R.B., Liao, S., & Ba, J. (2017). Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation. In Advances in neural information processing systems (pp. 5279\u20135288)."},{"key":"9600_CR44","volume-title":"An introduction to fuzzy logic applications in intelligent systems","author":"RR Yager","year":"2012","unstructured":"Yager, R. R., & Zadeh, L. A. (2012). An introduction to fuzzy logic applications in intelligent systems (Vol. 165). Berlin: Springer."},{"issue":"3","key":"9600_CR45","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1016\/S0019-9958(65)90241-X","volume":"8","author":"LA Zadeh","year":"1965","unstructured":"Zadeh, L. A. (1965). Fuzzy sets. Information and Control, 8(3), 338\u2013353.","journal-title":"Information and Control"},{"issue":"4","key":"9600_CR46","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1109\/2.53","volume":"21","author":"LA Zadeh","year":"1988","unstructured":"Zadeh, L. A. (1988). Fuzzy logic. Computer, 21(4), 83\u201393.","journal-title":"Computer"},{"issue":"2","key":"9600_CR47","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1002\/aaai.12053","volume":"43","author":"S Zhang","year":"2022","unstructured":"Zhang, S., & Sridharan, M. (2022). A survey of knowledge-based sequential decision-making under uncertainty. AI Magazine, 43(2), 249\u2013266.","journal-title":"AI Magazine"},{"key":"9600_CR48","unstructured":"Zhang, P., Hao, J., Wang, W., Tang, H., Ma, Y., Duan, Y., & Zheng, Y. Kogun: Accelerating deep reinforcement learning via integrating human suboptimal knowledge"},{"key":"9600_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Ren, J., Li, J., Fang, Q., & Xu, X. (2021). Deep q-learning with explainable and transferable domain rules. In International conference on intelligent computing (pp. 259\u2013273). Springer","DOI":"10.1007\/978-3-030-84529-2_22"},{"key":"9600_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, S., Ren, W., Ren, X., Mi, X., & Yi, X. (2021). Kg-rl: A knowledge-guided reinforcement learning for massive battle games. In Pacific rim international conference on artificial intelligence (pp. 83\u201394). Springer","DOI":"10.1007\/978-3-030-89370-5_7"},{"key":"9600_CR51","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Mottaghi, R., Kolve, E., Lim, J.J., Gupta, A., Fei-Fei, L., & Farhadi, A. (2017). Target-driven visual navigation in indoor scenes using deep reinforcement learning. In 2017 IEEE international conference on robotics and automation (ICRA) (pp 3357\u20133364). IEEE","DOI":"10.1109\/ICRA.2017.7989381"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-023-09600-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-023-09600-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-023-09600-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,11]],"date-time":"2023-05-11T07:45:15Z","timestamp":1683791115000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-023-09600-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,18]]},"references-count":51,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["9600"],"URL":"https:\/\/doi.org\/10.1007\/s10458-023-09600-1","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,18]]},"assertion":[{"value":"12 January 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 February 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"17"}}