{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T09:50:07Z","timestamp":1748425807905,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030845285"},{"type":"electronic","value":"9783030845292"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-84529-2_22","type":"book-chapter","created":{"date-parts":[[2021,8,9]],"date-time":"2021-08-09T15:01:42Z","timestamp":1628521302000},"page":"259-273","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Deep Q-learning with Explainable and Transferable Domain Rules"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3647-0245","authenticated-orcid":false,"given":"Yichuan","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6199-7452","authenticated-orcid":false,"given":"Junkai","family":"Ren","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0978-3649","authenticated-orcid":false,"given":"Junxiang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5063-6889","authenticated-orcid":false,"given":"Qiang","family":"Fang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3238-745X","authenticated-orcid":false,"given":"Xin","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,9]]},"reference":[{"key":"22_CR1","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Mirowski, P., et al.: Learning to navigate in cities without a map. In: NeurIPS (2018)","DOI":"10.1145\/3347450.3357659"},{"key":"22_CR3","unstructured":"Du, Y., Narasimhan, K.: Task-agnostic dynamics priors for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1696\u20131705. PMLR (2019)"},{"key":"22_CR4","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.ins.2020.03.105","volume":"532","author":"J Li","year":"2020","unstructured":"Li, J., Yao, L., Xu, X., Cheng, B., Ren, J.: Deep reinforcement learning for pedestrian collision avoidance and human-machine cooperative driving. Inf. Sci. 532, 110\u2013124 (2020). https:\/\/doi.org\/10.1016\/j.ins.2020.03.105","journal-title":"Inf. Sci."},{"key":"22_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3054912","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M.M., Elyan, E., Jayne, C.: Imitation learning: a survey of learning methods. ACM Comput. Surv. (CSUR). 50, 1\u201335 (2017). https:\/\/doi.org\/10.1145\/3054912","journal-title":"ACM Comput. Surv. (CSUR)."},{"key":"22_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1007\/978-3-030-01234-2_36","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Liang","year":"2018","unstructured":"Liang, X., Wang, T., Yang, L., Xing, E.: CIRL: controllable imitative reinforcement learning for vision-based self-driving. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 604\u2013620. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_36"},{"key":"22_CR7","unstructured":"Bojarski, M., et al.: Explaining how a deep neural network trained with end-to-end learning steers a car. arXiv preprint arXiv:1704.07911 (2017)"},{"key":"22_CR8","doi-asserted-by":"publisher","unstructured":"Xie, X., Li, C., Zhang, C., Zhu, Y., Zhu, S.-C.: Learning virtual grasp with failed demonstrations via Bayesian inverse reinforcement learning. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1812\u20131817. IEEE (2019). https:\/\/doi.org\/10.1109\/IROS40897.2019.8968063","DOI":"10.1109\/IROS40897.2019.8968063"},{"key":"22_CR9","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: OpenAI Gym. arXiv:1606.01540 [cs] (2016)"},{"key":"22_CR10","unstructured":"Tasfi, N.: PyGame Learning Environment. https:\/\/github.com\/ntasfi\/PyGame-Learning-Environment"},{"key":"22_CR11","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"22_CR13","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1995\u20132003. PMLR (2016)"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Hessel, M., et al.: Rainbow: combining improvements in deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"22_CR15","unstructured":"Ross, S., Bagnell, D.: Efficient reductions for imitation learning. In: Proceedings of the thirteenth international conference on artificial intelligence and statistics. In: JMLR Workshop and Conference Proceedings, pp. 661\u2013668 (2010)"},{"key":"22_CR16","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: NIPS (2016)"},{"key":"22_CR17","unstructured":"Goodfellow, I.J., et al.: Generative adversarial nets. In: NIPS (2014)"},{"key":"22_CR18","doi-asserted-by":"publisher","unstructured":"Li, Y., Song, J., Ermon, S.: InfoGAIL: interpretable imitation learning from visual demonstrations. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 3815\u20133825 (2017). https:\/\/doi.org\/10.1007\/978-3-319-70139-4","DOI":"10.1007\/978-3-319-70139-4"},{"key":"22_CR19","unstructured":"Hausman, K., Chebotar, Y., Schaal, S., Sukhatme, G., Lim, J.J.: Multi-modal imitation learning from unstructured demonstrations using generative adversarial nets. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 1235\u20131245 (2017)"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Hester, T., et al.: Deep q-learning from demonstrations. In: Proceedings of the AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"22_CR21","unstructured":"Vecerik, M., et al.: Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817 (2017)"},{"issue":"13","key":"22_CR22","doi-asserted-by":"publisher","first-page":"9761","DOI":"10.1007\/s00521-019-04509-x","volume":"32","author":"C Treesatayapun","year":"2019","unstructured":"Treesatayapun, C.: Knowledge-based reinforcement learning controller with fuzzy-rule network: experimental validation. Neural Comput. Appl. 32(13), 9761\u20139775 (2019). https:\/\/doi.org\/10.1007\/s00521-019-04509-x","journal-title":"Neural Comput. Appl."},{"key":"22_CR23","doi-asserted-by":"publisher","unstructured":"Likmeta, A., Metelli, A.M., Tirinzoni, A., Giol, R., Restelli, M., Romano, D.: Combining reinforcement learning with rule-based controllers for transparent and general decision-making in autonomous driving. Robot. Auton. Syst. 131, 103568 (2020). https:\/\/doi.org\/10.1016\/j.robot.2020.103568","DOI":"10.1016\/j.robot.2020.103568"},{"key":"22_CR24","doi-asserted-by":"publisher","unstructured":"Vincze, D., T\u00f3th, A., Niitsuma, M.: Antecedent redundancy exploitation in fuzzy rule interpolation-based reinforcement learning. In: 2020 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM), pp. 1316\u20131321. IEEE (2020). https:\/\/doi.org\/10.1109\/AIM43001.2020.9158875","DOI":"10.1109\/AIM43001.2020.9158875"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, P., et al.: KoGuN: accelerating deep reinforcement learning via integrating human suboptimal knowledge. arXiv preprint arXiv:2002.07418 (2020)","DOI":"10.24963\/ijcai.2020\/317"},{"key":"22_CR26","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal Policy Optimization Algorithms. arXiv:1707.06347 [cs] (2017)"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Yang, Q., Zhang, Y., Dai, W., Pan, S.J.: Transfer Learning. Cambridge University Press, Cambridge (2020)","DOI":"10.1017\/9781139061773"},{"key":"22_CR28","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112, 181\u2013211 (1999). https:\/\/doi.org\/10.1016\/S0004-3702(99)00052-1","journal-title":"Artif. Intell."},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Yin, H., Pan, S.: Knowledge transfer for deep reinforcement learning with hierarchical experience replay. In: Proceedings of the AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.10733"},{"key":"22_CR30","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"22_CR31","doi-asserted-by":"publisher","unstructured":"Li, S., Xu, X., Zuo, L.: Dynamic path planning of a mobile robot with improved Q-learning algorithm. In: 2015 IEEE International Conference on Information and Automation, pp. 409\u2013414. IEEE (2015). https:\/\/doi.org\/10.1109\/ICInfA.2015.7279322","DOI":"10.1109\/ICInfA.2015.7279322"}],"container-title":["Lecture Notes in Computer Science","Intelligent Computing Theories and Application"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-84529-2_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T15:03:44Z","timestamp":1710255824000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-84529-2_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030845285","9783030845292"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-84529-2_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"9 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenzhen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 August 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 August 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2021a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2021\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}