{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:08:23Z","timestamp":1743062903717,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":19,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819902712"},{"type":"electronic","value":"9789819902729"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-0272-9_15","type":"book-chapter","created":{"date-parts":[[2023,2,15]],"date-time":"2023-02-15T14:16:09Z","timestamp":1676470569000},"page":"220-232","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hierarchical Policies of\u00a0Subgoals for\u00a0Safe Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4812-0549","authenticated-orcid":false,"given":"Fumin","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1240-100X","authenticated-orcid":false,"given":"Feng","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1174-940X","authenticated-orcid":false,"given":"Yao","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2486-8379","authenticated-orcid":false,"given":"Xiaofei","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5913-8774","authenticated-orcid":false,"given":"Yinglong","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,2,16]]},"reference":[{"unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems 30 (2017)","key":"15_CR1"},{"doi-asserted-by":"publisher","unstructured":"Cheng, J., Yu, F., Zhang, H., Dai, Y.: Skill reward for safe deep reinforcement learning. In: Wang, G., Choo, K.K.R., Ko, R.K.L., Xu, Y., Crispo, B. (eds.) Ubiquitous Security. UbiSec 2021. Communications in Computer and Information Science, vol. 1557, pp 203\u2013213. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-19-0468-4_15","key":"15_CR2","DOI":"10.1007\/978-981-19-0468-4_15"},{"unstructured":"Ferreira, E., Avignon, F., Lefevre, F.: On the use of social signal for reward shaping in reinforcement learning for dialogue management. In: SEMDIAL 2013 DialDam, p. 44 (2013)","key":"15_CR3"},{"unstructured":"Horgan, D., et al.: Distributed prioritized experience replay. arXiv preprint arXiv:1803.00933 (2018)","key":"15_CR4"},{"doi-asserted-by":"publisher","unstructured":"Iosif, A.C., Gasiba, T.E., Zhao, T., Lechner, U., Pinto-Albuquerque, M.: A large-scale study on the security vulnerabilities of cloud deployments. In: Wang, G., Choo, K.K.R., Ko, R.K.L., Xu, Y., Crispo, B. (eds.) Ubiquitous Security, UbiSec 2021. CCIS, vol. 1557, pp 171\u2013188. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-19-0468-4_13","key":"15_CR5","DOI":"10.1007\/978-981-19-0468-4_13"},{"unstructured":"Jaderberg, M., Mnih, V., Czarnecki, W.M., Schaul, T., Leibo, J.Z., Silver, D., Kavukcuoglu, K.: Reinforcement learning with unsupervised auxiliary tasks. arXiv preprint arXiv:1611.05397 (2016)","key":"15_CR6"},{"doi-asserted-by":"publisher","unstructured":"Koay, A.M.Y., Xie, M., Ko, R.K.L., Sterner, C., Choi, T., Dong, N.: Sdgen: A scalable, reproducible and flexible approach to generate real world cyber security datasets. In: Wang, G., Choo, K.K.R., Ko, R.K.L., Xu, Y., Crispo, B. (eds.) Ubiquitous Security. UbiSec 2021. Communications in Computer and Information Science, vol 1557, pp 102\u2013115. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-19-0468-4_8","key":"15_CR7","DOI":"10.1007\/978-981-19-0468-4_8"},{"unstructured":"Lou, P., Xu, K., Jiang, X., Xiao, Z., Yan, J.: Path planning in an unknown environment based on deep reinforcement learning with prior knowledge. J. Intell. Fuzzy Syst. (Preprint), 1\u201317 (2021)","key":"15_CR8"},{"unstructured":"Mirowski, P., Pascanu, R., Viola, F., Soyer, H., Hadsell, R.: Learning to navigate in complex environments. arXiv (2016)","key":"15_CR9"},{"issue":"7540","key":"15_CR10","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"unstructured":"Ng, A.Y., Russell, S., et al.: Algorithms for inverse reinforcement learning. In: Icml, vol. 1, p. 2 (2000)","key":"15_CR11"},{"unstructured":"Plappert, M., et al.: Multi-goal reinforcement learning: Challenging robotics environments and request for research. arXiv preprint arXiv:1802.09464 (2018)","key":"15_CR12"},{"unstructured":"Riedmiller, M., et al.: Learning by playing solving sparse reward tasks from scratch. In: International Conference On Machine Learning, pp. 4344\u20134353. PMLR (2018)","key":"15_CR13"},{"unstructured":"Schaul, T., Horgan, D., Gregor, K., Silver, D.: Universal value function approximators. In: International Conference on Machine Learning, pp. 1312\u20131320. PMLR (2015)","key":"15_CR14"},{"unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. arXiv preprint arXiv:1511.05952 (2015)","key":"15_CR15"},{"issue":"7587","key":"15_CR16","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"doi-asserted-by":"publisher","unstructured":"Tang, Y., Zhang, D., Liang, W., Li, K.C., Sukhija, N.: Active malicious accounts detection with multimodal fusion machine learning algorithm. In: Wang, G., Choo, K.K.R., Ko, R.K.L., Xu, Y., Crispo, B. (eds.) Ubiquitous Security. UbiSec 2021. CCIS, vol. 1557, pp 38\u201352. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-19-0468-4_4","key":"15_CR17","DOI":"10.1007\/978-981-19-0468-4_4"},{"doi-asserted-by":"crossref","unstructured":"Tavakoli, A., Pardo, F., Kormushev, P.: Action branching architectures for deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","key":"15_CR18","DOI":"10.1609\/aaai.v32i1.11798"},{"doi-asserted-by":"crossref","unstructured":"Zhao, X., Zhang, L., Xia, L., Ding, Z., Yin, D., Tang, J.: Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209 (2017)","key":"15_CR19","DOI":"10.1145\/3240323.3240374"}],"container-title":["Communications in Computer and Information Science","Ubiquitous Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-0272-9_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,13]],"date-time":"2023-06-13T17:04:10Z","timestamp":1686675850000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-0272-9_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819902712","9789819902729"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-0272-9_15","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"16 February 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"UbiSec","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Ubiquitous Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhangjiajie","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ubisec2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ubisecurity.org\/2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"98","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}