{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T03:34:12Z","timestamp":1743132852575,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031255489"},{"type":"electronic","value":"9783031255496"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-25549-6_3","type":"book-chapter","created":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T09:07:44Z","timestamp":1679389664000},"page":"29-44","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Deep Reinforcement Learning via\u00a0Policy-Extended Successor Feature Approximator"],"prefix":"10.1007","author":[{"given":"Yining","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianpei","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyao","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,22]]},"reference":[{"key":"3_CR1","unstructured":"Alegre, L.N., Bazzan, A.L.C., da Silva, B.C.: Optimistic linear support and successor features as a basis for optimal policy transfer. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesv\u00e1ri, C., Niu, G., Sabato, S. (eds.) International Conference on Machine Learning, ICML 2022, Baltimore, Maryland, USA, 17\u201323 July 2022. Proceedings of Machine Learning Research, vol. 162, pp. 394\u2013413. PMLR (2022)"},{"key":"3_CR2","unstructured":"Alver, S., Precup, D.: Constructing a good behavior basis for transfer using generalized policy updates. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, 25\u201329 April 2022. OpenReview.net (2022)"},{"key":"3_CR3","unstructured":"Barreto, A., et al.: Successor features for transfer in reinforcement learning. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, Long Beach, CA, USA, 4\u20139 December 2017, pp. 4055\u20134065 (2017)"},{"key":"3_CR4","unstructured":"Borsa, D., et al.: Universal successor features approximators. CoRR abs\/1812.07626 (2018)"},{"key":"3_CR5","unstructured":"Ellenberger, B.: Pybullet gymperium (2018\u20132019)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Feinberg, A.: Markov decision processes: discrete stochastic dynamic programming (Martin l. Puterman). SIAM Rev. 38(4), 689 (1996)","DOI":"10.1137\/1038137"},{"key":"3_CR7","unstructured":"Filos, A., Lyle, C., Gal, Y., Levine, S., Jaques, N., Farquhar, G.: Psiphi-learning: reinforcement learning with demonstrations using successor features and inverse temporal difference learning. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol. 139, pp. 3305\u20133317. PMLR (2021)"},{"key":"3_CR8","unstructured":"Gimelfarb, M., Barreto, A., Sanner, S., Lee, C.: Risk-aware transfer in reinforcement learning using successor features. In: Ranzato, M., Beygelzimer, A., Dauphin, Y.N., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, 6\u201314 December 2021, Virtual, pp. 17298\u201317310 (2021)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Han, D., Tschiatschek, S.: Option transfer and SMDP abstraction with successor features. In: Raedt, L.D. (ed.) Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI 2022, Vienna, Austria, 23\u201329 July 2022, pp. 3036\u20133042. ijcai.org (2022)","DOI":"10.24963\/ijcai.2022\/421"},{"key":"3_CR10","unstructured":"Hansen, S., Dabney, W., Barreto, A., Warde-Farley, D., de Wiele, T.V., Mnih, V.: Fast task inference with variational intrinsic successor features. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 26\u201330 April 2020. OpenReview.net (2020)"},{"key":"3_CR11","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: Bengio, Y., LeCun, Y. (eds.) 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, 2\u20134 May 2016, Conference Track Proceedings (2016)"},{"key":"3_CR12","unstructured":"Liu, H., Abbeel, P.: APS: active pretraining with successor features. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol. 139, pp. 6736\u20136747. PMLR (2021)"},{"issue":"7540","key":"3_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nat. 518(7540), 529\u2013533 (2015)","journal-title":"Nat."},{"key":"3_CR14","unstructured":"Nemecek, M.W., Parr, R.: Policy caches with successor features. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol. 139, pp. 8025\u20138033. PMLR (2021)"},{"key":"3_CR15","unstructured":"Raileanu, R., Goldstein, M., Szlam, A., Fergus, R.: Fast adaptation to new environments via policy-dynamics value functions. In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13\u201318 July 2020, Virtual Event. Proceedings of Machine Learning Research, vol. 119, pp. 7920\u20137931. PMLR (2020)"},{"key":"3_CR16","unstructured":"Schaul, T., Horgan, D., Gregor, K., Silver, D.: Universal value function approximators. In: Bach, F.R., Blei, D.M. (eds.) Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6\u201311 July 2015. JMLR Workshop and Conference Proceedings, vol. 37, pp. 1312\u20131320. JMLR.org (2015)"},{"issue":"7587","key":"3_CR17","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nat. 529(7587), 484\u2013489 (2016)","journal-title":"Nat."},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning - An Introduction. Adaptive Computation and Machine Learning. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Tang, H., et al.: What about inputting policy in value function: policy representation and policy-extended value function approximator. In: Thirty-Sixth AAAI Conference on Artificial Intelligence, AAAI 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, IAAI 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, EAAI 2022 Virtual Event, 22 February\u20131 March 2022, pp. 8441\u20138449. AAAI Press (2022)","DOI":"10.1609\/aaai.v36i8.20820"},{"key":"3_CR20","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10, 1633\u20131685 (2009). https:\/\/dl.acm.org\/doi\/10.5555\/1577069.1755839"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Yang, T., et al.: Efficient deep reinforcement learning via adaptive policy transfer. In: Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, pp. 3094\u20133100 (2020)","DOI":"10.24963\/ijcai.2020\/428"},{"key":"3_CR22","unstructured":"Zhu, Z., Lin, K., Zhou, J.: Transfer learning in deep reinforcement learning: a survey. CoRR abs\/2009.07888 (2020)"}],"container-title":["Lecture Notes in Computer Science","Distributed Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-25549-6_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T09:08:08Z","timestamp":1679389688000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-25549-6_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031255489","9783031255496"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-25549-6_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Distributed Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dai22022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.adai.ai\/dai\/2022\/2022.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}