{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T03:22:28Z","timestamp":1743132148002,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030616083"},{"type":"electronic","value":"9783030616090"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-61609-0_29","type":"book-chapter","created":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T19:02:59Z","timestamp":1603134179000},"page":"366-378","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Identifying Critical States by the Action-Based Variance of Expected Return"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3267-3886","authenticated-orcid":false,"given":"Izumi","family":"Karino","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9158-5360","authenticated-orcid":false,"given":"Yoshiyuki","family":"Ohmura","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8443-4161","authenticated-orcid":false,"given":"Yasuo","family":"Kuniyoshi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,14]]},"reference":[{"key":"29_CR1","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. In: Advances in Neural Information Processing Systems, pp. 1471\u20131479 (2016)"},{"key":"29_CR2","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"29_CR3","unstructured":"Brockman, G., et al.: Openai gym (2016)"},{"key":"29_CR4","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, vol. 9, pp. 249\u2013256. PMLR, 13\u201315 May 2010"},{"key":"29_CR5","unstructured":"Goyal, A., et al.: Transfer and exploration via the information bottleneck. In: International Conference on Learning Representations (2019)"},{"key":"29_CR6","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Proceedings of the 35th International Conference on Machine Learning, vol. 80, pp. 1861\u20131870. PMLR, 10\u201315 July 2018"},{"key":"29_CR7","doi-asserted-by":"crossref","unstructured":"Henderson, P., Islam, R., Bachman, P., Pineau, J., Precup, D., Meger, D.: Deep reinforcement learning that matters. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"29_CR8","unstructured":"Jayaraman, D., Ebert, F., Efros, A., Levine, S.: Time-agnostic prediction: predicting predictable video frames. In: International Conference on Learning Representations (2019)"},{"key":"29_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"794","DOI":"10.1007\/978-3-642-01507-6_89","volume-title":"Advances in Neural Networks \u2013 ISNN 2009","author":"SJ Kazemitabar","year":"2009","unstructured":"Kazemitabar, S.J., Beigy, H.: Using strongly connected components as a basis for autonomous skill acquisition in reinforcement learning. In: Yu, W., He, H., Zhang, N. (eds.) ISNN 2009. LNCS, vol. 5551, pp. 794\u2013803. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-01507-6_89"},{"key":"29_CR10","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, San Diego, CA, USA, 7\u20139 May 2015. Conference Track Proceedings (2015)"},{"issue":"4","key":"29_CR11","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1016\/j.robot.2004.07.004","volume":"48","author":"Y Kuniyoshi","year":"2004","unstructured":"Kuniyoshi, Y., Ohmura, Y., Terada, K., Nagakubo, A., Eitoku, S., Yamamoto, T.: Embodied basis of invariant features in execution and perception of whole-body dynamic actions\u2013knacks and focuses of roll-and-rise motion. Robot. Auton. Syst. 48(4), 189\u2013201 (2004)","journal-title":"Robot. Auton. Syst."},{"key":"29_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1007\/978-3-030-10928-8_25","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"G Liu","year":"2019","unstructured":"Liu, G., Schulte, O., Zhu, W., Li, Q.: Toward interpretable deep reinforcement learning with linear model U-trees. In: Berlingerio, M., Bonchi, F., G\u00e4rtner, T., Hurley, N., Ifrim, G. (eds.) ECML PKDD 2018. LNCS (LNAI), vol. 11052, pp. 414\u2013429. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-10928-8_25"},{"key":"29_CR13","unstructured":"McGovern, A., Barto, A.G.: Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the Eighteenth International Conference on Machine Learning, pp. 361\u2013368 (2001)"},{"issue":"7540","key":"29_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A.A., Darrell, T.: Curiosity-driven exploration by self-supervised prediction. In: International Conference on Machine Learning (ICML), vol. 2017 (2017)","DOI":"10.1109\/CVPRW.2017.70"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"\u015eim\u015fek, \u00d6., Barto, A.G.: Using relative novelty to identify useful temporal abstractions in reinforcement learning. In: Proceedings of the Twenty-First International Conference on Machine Learning, p. 95. ACM (2004)","DOI":"10.1145\/1015330.1015353"},{"key":"29_CR17","unstructured":"\u015eim\u015fek, \u00d6., Barto, A.G.: Skill characterization based on betweenness. In: Advances in Neural Information Processing Systems, pp. 1497\u20131504 (2009)"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"\u015eim\u015fek, \u00d6., Wolfe, A.P., Barto, A.G.: Identifying useful subgoals in reinforcement learning by local graph partitioning. In: Proceedings of the 22nd International Conference on Machine Learning, pp. 816\u2013823. ACM (2005)","DOI":"10.1145\/1102351.1102454"},{"key":"29_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/3-540-45622-8_16","volume-title":"Abstraction, Reformulation, and Approximation","author":"M Stolle","year":"2002","unstructured":"Stolle, M., Precup, D.: Learning options in reinforcement learning. In: Koenig, S., Holte, R.C. (eds.) SARA 2002. LNCS (LNAI), vol. 2371, pp. 212\u2013223. Springer, Heidelberg (2002). https:\/\/doi.org\/10.1007\/3-540-45622-8_16"},{"key":"29_CR20","unstructured":"Verma, A., Murali, V., Singh, R., Kohli, P., Chaudhuri, S.: Programmatically interpretable reinforcement learning. In: Proceedings of the 35th International Conference on Machine Learning, vol. 80, pp. 5045\u20135054. PMLR, 10\u201315 July 2018"},{"issue":"3\u20134","key":"29_CR21","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Machine learning. Q-learning 8(3\u20134), 279\u2013292 (1992). https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Q-learning"},{"issue":"1\u20132","key":"29_CR22","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/0016-0032(76)90138-1","volume":"301","author":"IH Witten","year":"1976","unstructured":"Witten, I.H.: The apparent conflict between estimation and control\u2013a survey of the two-armed bandit problem. J. Franklin Inst. 301(1\u20132), 161\u2013189 (1976)","journal-title":"J. Franklin Inst."}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-61609-0_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,23]],"date-time":"2022-11-23T22:44:57Z","timestamp":1669243497000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-61609-0_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030616083","9783030616090"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-61609-0_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"14 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bratislava","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Slovakia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"249","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"139","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"56% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"*The conference was postponed to 2021 due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}