{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T09:28:39Z","timestamp":1743154119699,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811904677"},{"type":"electronic","value":"9789811904684"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-981-19-0468-4_15","type":"book-chapter","created":{"date-parts":[[2022,2,25]],"date-time":"2022-02-25T14:02:47Z","timestamp":1645797767000},"page":"203-213","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Skill Reward for Safe Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0627-3619","authenticated-orcid":false,"given":"Jiangchang","family":"Cheng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4812-0549","authenticated-orcid":false,"given":"Fumin","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0556-7981","authenticated-orcid":false,"given":"Hongliang","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5913-8774","authenticated-orcid":false,"given":"Yinglong","family":"Dai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,26]]},"reference":[{"issue":"7","key":"15_CR1","doi-asserted-by":"publisher","first-page":"4434","DOI":"10.1007\/s10489-020-02034-2","volume":"51","author":"T Aotani","year":"2021","unstructured":"Aotani, T., Kobayashi, T., Sugimoto, K.: Bottom-up multi-agent reinforcement learning by reward shaping for cooperative-competitive tasks. Appl. Intell. 51(7), 4434\u20134452 (2021). https:\/\/doi.org\/10.1007\/s10489-020-02034-2","journal-title":"Appl. Intell."},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Bacon, P.L., Harb, J., Precup, D.: The option-critic architecture. In: Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence, AAAI 2017, p. 1726C1734. AAAI Press (2017)","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"15_CR3","doi-asserted-by":"publisher","unstructured":"Dai, Y., Wang, G., Muhammad, K., Liu, S.: A closed-loop healthcare processing approach based on deep reinforcement learning. Multimedia Tools Appl. 1\u201323 (2020). https:\/\/doi.org\/10.1007\/s11042-020-08896-5","DOI":"10.1007\/s11042-020-08896-5"},{"key":"15_CR4","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 5, (NIPS Conference), p. 271C278. Morgan Kaufmann Publishers Inc., San Francisco (1992)"},{"key":"15_CR5","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.neucom.2020.02.008","volume":"393","author":"Y Dong","year":"2020","unstructured":"Dong, Y., Tang, X., Yuan, Y.: Principled reward shaping for reinforcement learning via Lyapunov stability theory. Neurocomputing 393, 83\u201390 (2020)","journal-title":"Neurocomputing"},{"key":"15_CR6","first-page":"100425","volume":"11","author":"NP Farazi","year":"2021","unstructured":"Farazi, N.P., Zou, B., Ahamed, T., Barua, L.: Deep reinforcement learning in transportation research: a review. Transp. Res. Interdisc. Perspect. 11, 100425 (2021)","journal-title":"Transp. Res. Interdisc. Perspect."},{"key":"15_CR7","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"15_CR9","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"15_CR10","unstructured":"Harutyunyan, A., Brys, T., Vrancx, P., Now\u00e9, A.: Off-policy reward shaping with ensembles. arXiv preprint arXiv:1502.03248 (2015)"},{"key":"15_CR11","doi-asserted-by":"publisher","unstructured":"Haydari, A., Yilmaz, Y.: Deep reinforcement learning for intelligent transportation systems: a survey. IEEE Trans. Intell. Transp. Syst. 23(1), 11\u201332 (2022). https:\/\/doi.org\/10.1109\/TITS.2020.3008612","DOI":"10.1109\/TITS.2020.3008612"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Hu, Y.J., Lin, S.J.: Deep reinforcement learning for optimizing finance portfolio management. In: 2019 Amity International Conference on Artificial Intelligence (AICAI), pp. 14\u201320. IEEE (2019)","DOI":"10.1109\/AICAI.2019.8701368"},{"key":"15_CR13","unstructured":"Kimura, D., Chaudhury, S., Tachibana, R., Dasgupta, S.: Internal model from observations for reward shaping. arXiv preprint arXiv:1806.01267 (2018)"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Liu, X.Y., et al.: FinRL: A deep reinforcement learning library for automated stock trading in quantitative finance. arXiv preprint arXiv:2011.09607 (2020)","DOI":"10.2139\/ssrn.3737859"},{"issue":"4","key":"15_CR15","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1109\/TITS.2020.3048361","volume":"22","author":"K Manchella","year":"2021","unstructured":"Manchella, K., Umrawal, A.K., Aggarwal, V.: FlexPool: a distributed model-free deep reinforcement learning algorithm for joint passengers and goods transportation. IEEE Trans. Intell. Transp. Syst. 22(4), 2035\u20132047 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"15_CR16","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)"},{"issue":"7540","key":"15_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Pang, Z.J., Liu, R.Z., Meng, Z.Y., Zhang, Y., Yu, Y., Lu, T.: On reinforcement learning for full-length game of starcraft. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 4691\u20134698 (2019)","DOI":"10.1609\/aaai.v33i01.33014691"},{"key":"15_CR19","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897. PMLR (2015)"},{"key":"15_CR20","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"issue":"1C2","key":"15_CR21","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1C2), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence, pp. 4950\u20134957 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"15_CR23","unstructured":"Vezhnevets, A.S., et al.: FeUdal networks for hierarchical reinforcement learning. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 70, pp. 3540\u20133549. PMLR, International Convention Centre, Sydney, Australia, 06\u201311 August 2017"},{"issue":"7782","key":"15_CR24","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W.M., Mathieu, M., Silver, D.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"issue":"1","key":"15_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3477600","volume":"55","author":"C Yu","year":"2021","unstructured":"Yu, C., Liu, J., Nemati, S., Yin, G.: Reinforcement learning in healthcare: a survey. ACM Comput. Surv. (CSUR) 55(1), 1\u201336 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, W., Queralta, J.P., Westerlund, T.: Sim-to-real transfer in deep reinforcement learning for robotics: a survey. In: 2020 IEEE Symposium Series on Computational Intelligence (SSCI), pp. 737\u2013744. IEEE (2020)","DOI":"10.1109\/SSCI47803.2020.9308468"}],"container-title":["Communications in Computer and Information Science","Ubiquitous Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-19-0468-4_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T22:44:34Z","timestamp":1674859474000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-19-0468-4_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9789811904677","9789811904684"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-19-0468-4_15","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"26 February 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"UbiSec","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Inernational Conference on Ubiquitous Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ubisec2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ubisecurity.org\/2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"96","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}