{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:16:17Z","timestamp":1742919377950,"version":"3.40.3"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030003074"},{"type":"electronic","value":"9783030003081"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00308-1_16","type":"book-chapter","created":{"date-parts":[[2018,9,6]],"date-time":"2018-09-06T18:43:53Z","timestamp":1536259433000},"page":"190-203","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Concurrent Hierarchical Reinforcement Learning for RoboCup Keepaway"],"prefix":"10.1007","author":[{"given":"Aijun","family":"Bai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stuart","family":"Russell","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoping","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,9,7]]},"reference":[{"key":"16_CR1","unstructured":"Andre, D., Russell, S.J.: Programmable reinforcement learning agents. In: Advances in Neural Information Processing Systems, pp. 1019\u20131025 (2001)"},{"key":"16_CR2","unstructured":"Andre, D., Russell, S.J.: State abstraction for programmable reinforcement learning agents. In: Proceedings of the 8th National Conference on Artificial Intelligence and 14th Conference on Innovative Applications of Artificial Intelligence, pp. 119\u2013125 (2002)"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Bai, A., Russell, S.J.: Efficient reinforcement learning with hierarchies of machines by leveraging internal transitions. In: Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, IJCAI 2017, Melbourne, Australia, 19\u201325 August 2017","DOI":"10.24963\/ijcai.2017\/196"},{"issue":"4","key":"16_CR4","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1145\/2717316","volume":"6","author":"A Bai","year":"2015","unstructured":"Bai, A., Wu, F., Chen, X.: Online planning for large Markov decision processes with hierarchical decomposition. ACM Trans. Intell. Syst. Technol. 6(4), 45 (2015)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"16_CR5","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"A Barto","year":"2003","unstructured":"Barto, A., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discret. Event Dyn. Syst. 13, 341\u2013379 (2003)","journal-title":"Discret. Event Dyn. Syst."},{"key":"16_CR6","volume-title":"Dynamic Programming","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"issue":"1","key":"16_CR7","first-page":"63","volume":"13","author":"TG Dietterich","year":"1999","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. J. Mach. Learn. Res. 13(1), 63 (1999)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR8","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-540-74024-7_7","volume-title":"RoboCup 2006: Robot Soccer World Cup X","author":"S Kalyanakrishnan","year":"2007","unstructured":"Kalyanakrishnan, S., Liu, Y., Stone, P.: Half field offense in RoboCup soccer: a multiagent reinforcement learning case study. In: Lakemeyer, G., Sklar, E., Sorrenti, D.G., Takahashi, T. (eds.) RoboCup 2006. LNCS (LNAI), vol. 4434, pp. 72\u201385. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74024-7_7"},{"key":"16_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-642-11876-0_14","volume-title":"RoboCup 2009: Robot Soccer World Cup XIII","author":"S Kalyanakrishnan","year":"2010","unstructured":"Kalyanakrishnan, S., Stone, P.: Learning complementary multiagent behaviors: a case study. In: Baltes, J., Lagoudakis, M.G., Naruse, T., Ghidary, S.S. (eds.) RoboCup 2009. LNCS (LNAI), vol. 5949, pp. 153\u2013165. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-11876-0_14"},{"key":"16_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1007\/3-540-64473-3_49","volume-title":"RoboCup-97: Robot Soccer World Cup I","author":"H Kitano","year":"1998","unstructured":"Kitano, H., et al.: The RoboCup synthetic agent challenge 97. In: Kitano, H. (ed.) RoboCup 1997. LNCS, vol. 1395, pp. 62\u201373. Springer, Heidelberg (1998). https:\/\/doi.org\/10.1007\/3-540-64473-3_49"},{"key":"16_CR11","unstructured":"Marthi, B., Russell, S.J., Latham, D., Guestrin, C.: Concurrent hierarchical reinforcement learning. In: IJCAI, pp. 779\u2013785 (2005)"},{"issue":"7540","key":"16_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"16_CR13","first-page":"129","volume":"34","author":"EF Moore","year":"1956","unstructured":"Moore, E.F.: Gedanken-experiments on sequential machines. Automata Stud. 34, 129\u2013153 (1956)","journal-title":"Automata Stud."},{"key":"16_CR14","unstructured":"Parr, R., Russell, S.: Reinforcement learning with hierarchies of machines. In: Advances in Neural Information Processing Systems, vol. 10 (1998)"},{"issue":"7587","key":"16_CR15","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"3","key":"16_CR16","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P Stone","year":"2005","unstructured":"Stone, P., Sutton, R., Kuhlmann, G.: Reinforcement learning for robocup soccer keepaway. Adapt. Behav. 13(3), 165\u2013188 (2005)","journal-title":"Adapt. Behav."},{"key":"16_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/11780519_9","volume-title":"RoboCup 2005: Robot Soccer World Cup IX","author":"P Stone","year":"2006","unstructured":"Stone, P., Kuhlmann, G., Taylor, M.E., Liu, Y.: Keepaway soccer: from machine learning testbed to benchmark. In: Bredenfeld, A., Jacoff, A., Noda, I., Takahashi, Y. (eds.) RoboCup 2005. LNCS (LNAI), vol. 4020, pp. 93\u2013105. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11780519_9"},{"key":"16_CR18","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, vol. 1. MIT Press, Cambridge (1998)"},{"issue":"1","key":"16_CR19","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R Sutton","year":"1999","unstructured":"Sutton, R., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1), 181\u2013211 (1999)","journal-title":"Artif. Intell."}],"container-title":["Lecture Notes in Computer Science","RoboCup 2017: Robot World Cup XXI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00308-1_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,8]],"date-time":"2022-09-08T00:04:55Z","timestamp":1662595495000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00308-1_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030003074","9783030003081"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00308-1_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"7 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"RoboCup","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Robot World Cup","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nagoya","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 July 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 July 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"robocup2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.robocup2017.org\/eng\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"58","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"57% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,67","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,04","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9 other papers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}