{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T18:16:35Z","timestamp":1770747395967,"version":"3.49.0"},"reference-count":91,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Primary Research and Development Plan of China","award":["2020YFC2006602"],"award-info":[{"award-number":["2020YFC2006602"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072324"],"award-info":[{"award-number":["62072324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876217"],"award-info":[{"award-number":["61876217"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876121"],"award-info":[{"award-number":["61876121"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772357"],"award-info":[{"award-number":["61772357"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"University Natural Science Foundation of Jiangsu Province","award":["21KJA520005"],"award-info":[{"award-number":["21KJA520005"]}]},{"DOI":"10.13039\/501100013058","name":"Primary Research and Development Plan of Jiangsu Province","doi-asserted-by":"crossref","award":["BE2020026"],"award-info":[{"award-number":["BE2020026"]}],"id":[{"id":"10.13039\/501100013058","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/fund","name":"Postgraduate Research & Practice Innovation Program of Jiangsu Province","doi-asserted-by":"publisher","award":["KYCX21_3020"],"award-info":[{"award-number":["KYCX21_3020"]}],"id":[{"id":"10.13039\/fund","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Grid Computing"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10723-023-09663-0","type":"journal-article","created":{"date-parts":[[2023,6,5]],"date-time":"2023-06-05T09:02:55Z","timestamp":1685955775000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Reinforcement Learning in Few-Shot Scenarios: A Survey"],"prefix":"10.1007","volume":"21","author":[{"given":"Zhechao","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiming","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianping","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunzhe","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"You","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongjie","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,5]]},"reference":[{"issue":"7676","key":"9663_CR1","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., Antonoglou, I., Huang, A., Guez, A., Hubert, T., Baker, L., Lai, M., Bolton, A., Chen, Y., Lillicrap, T., Hui, F., Sifre, L., van den Driessche, G., Graepel, T., Hassabis, D.: Mastering the game of Go without human knowledge. Nature 550(7676), 354\u2013359 (2017). https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"issue":"7782","key":"9663_CR2","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W.M., Mathieu, M., Dudzik, A., Chung, J., Choi, D.H., Powell, R., Ewalds, T., Georgiev, P., et al.: Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019). https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"key":"9663_CR3","unstructured":"Finn, C., Abbeel, P., Levine. S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: Proceedings of the international conference on machine learning, pp. 1126\u20131135. (2017)"},{"key":"9663_CR4","unstructured":"Finn, C., Rajeswaran, A., Kakade, S., Levine, S.: Online meta-learning. In: Proceedings of the international conference on machine learning, pp. 1920\u20131930 (2019)"},{"key":"9663_CR5","doi-asserted-by":"publisher","unstructured":"Fe-Fei, L., Fergus, P.: A bayesian approach to unsupervised one-shot learning of object categories. In: Proceedings of the ninth IEEE international conference on computer vision, pp. 1134\u20131141. (2003). https:\/\/doi.org\/10.1109\/iccv.2003.1238476","DOI":"10.1109\/iccv.2003.1238476"},{"issue":"5","key":"9663_CR6","doi-asserted-by":"publisher","first-page":"4740","DOI":"10.1109\/tvt.2020.2979493","volume":"69","author":"S Chen","year":"2020","unstructured":"Chen, S., Wang, M., Song, W., Yang, Y., Li, Y., Fu, M.: Stabilization approaches for reinforcement learning-based end-to-end autonomous driving. IEEE Trans. Vehic. Technol. 69(5), 4740\u20134750 (2020). https:\/\/doi.org\/10.1109\/tvt.2020.2979493","journal-title":"IEEE Trans. Vehic. Technol."},{"key":"9663_CR7","unstructured":"Chua, K., Calandra, R., McAllister, R., Levine, S.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: Proceedings of the advances in neural information processing systems, vol. 31, (2018)"},{"issue":"10","key":"9663_CR8","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"Pan, S.J., Yang, Q.: A survey on transfer learning. IEEE Trans. Knowl. Data Eng. 22(10), 1345\u20131359 (2010). https:\/\/doi.org\/10.1109\/TKDE.2009.191","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"9663_CR9","doi-asserted-by":"publisher","unstructured":"Lazaric, A.: Transfer in reinforcement learning: A framework and a survey. In: Adaptation, learning, and optimization, pp. 143\u2013173. Springer, Berlin, (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_5","DOI":"10.1007\/978-3-642-27645-3_5"},{"key":"9663_CR10","unstructured":"Chen, Y., Hoffman, M.W., Colmenarejo, S.G., Denil, M., Lillicrap, T.P., Botvinick, M., de\u00a0Freitas, N.: Learning to learn without gradient descent by gradient descent. In: Proceedings of the 34th international conference on machine learning, vol. 70, pp. 748\u2013756. (2017)"},{"key":"9663_CR11","doi-asserted-by":"publisher","unstructured":"Ji, J., Chen, X., Wang, Q., Yu, L., Li, P.: Learning to learn gradient aggregation by gradient descent. In: Proceedings of the twenty-eighth international joint conference on artificial intelligence, pp. 2614\u20132620. (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/363","DOI":"10.24963\/ijcai.2019\/363"},{"key":"9663_CR12","doi-asserted-by":"publisher","unstructured":"Liu, B.: Learning on the job: Online lifelong and continual learning. In: Proceedings of the AAAI conference on artificial intelligence, vol.\u00a034, pp. 13544\u201313549. (2020). https:\/\/doi.org\/10.1609\/aaai.v34i09.7079","DOI":"10.1609\/aaai.v34i09.7079"},{"issue":"3","key":"9663_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3386252","volume":"53","author":"Y Wang","year":"2020","unstructured":"Wang, Y., Yao, Q., Kwok, J.T., Ni, L.M.: Generalizing from a few examples. ACM Comput. Surv. 53(3), 1\u201334 (2020). https:\/\/doi.org\/10.1145\/3386252","journal-title":"ACM Comput. Surv."},{"key":"9663_CR14","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1613\/jair.731","volume":"12","author":"J Baxter","year":"2000","unstructured":"Baxter, J.: A model of inductive bias learning. J. Artif. Intel. Res. 12, 149\u2013198 (2000). https:\/\/doi.org\/10.1613\/jair.731","journal-title":"J. Artif. Intel. Res."},{"key":"9663_CR15","unstructured":"Silver, D.L.: Selective transfer of neural network task knowledge. PhD thesis, University of Western Ontario (2000)"},{"key":"9663_CR16","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: an introduction. MIT press (2018)"},{"key":"9663_CR17","volume-title":"On-line Q-learning using connectionist systems","author":"GA Rummery","year":"1994","unstructured":"Rummery, G.A., Niranjan, M.: On-line Q-learning using connectionist systems, vol. 37. University of Cambridge, Department of Engineering Cambridge, UK (1994)"},{"key":"9663_CR18","doi-asserted-by":"publisher","unstructured":"van Seijen, H., van Hasselt, H., Whiteson, S., Wiering, M.: A theoretical and empirical analysis of expected sarsa. In: Proceedings of the 2009 IEEE symposium on adaptive dynamic programming and reinforcement learning, (2009). https:\/\/doi.org\/10.1109\/adprl.2009.4927542","DOI":"10.1109\/adprl.2009.4927542"},{"key":"9663_CR19","doi-asserted-by":"publisher","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. In: Reinforcement Learning, Springer, US, pp. 5\u201332. (1992). https:\/\/doi.org\/10.1007\/978-1-4615-3618-5_2","DOI":"10.1007\/978-1-4615-3618-5_2"},{"key":"9663_CR20","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the international conference on machine learning, pp. 1889\u20131897. (2015)"},{"key":"9663_CR21","volume-title":"Machine learning","author":"TM Mitchell","year":"1997","unstructured":"Mitchell, T.M., et al.: Machine learning. McGraw-hill, New York (1997)"},{"key":"9663_CR22","doi-asserted-by":"publisher","first-page":"153171","DOI":"10.1109\/ACCESS.2021.3126658","volume":"9","author":"E Salvato","year":"2021","unstructured":"Salvato, E., Fenu, G., Medvet, E., Pellegrino, F.A.: Crossing the reality gap: a survey on sim-to-real transferability of robot controllers in reinforcement learning. IEEE Access 9, 153171\u2013153187 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3126658","journal-title":"IEEE Access"},{"issue":"1","key":"9663_CR23","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1109\/LRA.2021.3131698","volume":"7","author":"S Zhou","year":"2022","unstructured":"Zhou, S., Pereida, K., Zhao, W., Schoellig, A.P.: Bridging the model-reality gap with lipschitz network adaptation. IEEE Robotics and Automation Letters 7(1), 642\u2013649 (2022). https:\/\/doi.org\/10.1109\/LRA.2021.3131698","journal-title":"IEEE Robotics and Automation Letters"},{"key":"9663_CR24","doi-asserted-by":"publisher","unstructured":"Tobin, J., Fong, R., Ray, A., Schneider, J., Zaremba, W., Abbeel, P.: Domain randomization for transferring deep neural networks from simulation to the real world. In: Proceedings of the 2017 IEEE\/RSJ international conference on intelligent robots and systems (IROS), (2017). https:\/\/doi.org\/10.1109\/iros.2017.8202133","DOI":"10.1109\/iros.2017.8202133"},{"key":"9663_CR25","unstructured":"Andrychowicz, M., Wolski, F., Ray, A., Schneider, J., Fong, R., Welinder, P., McGrew, B., Tobin, J., Pieter\u00a0Abbeel, O., Zaremba, W.: Hindsight experience replay. In: Proceedings of the advances in neural information processing systems, vol. 30, (2017)"},{"key":"9663_CR26","unstructured":"Clavera, I., Nagabandi, A., Liu, S., Fearing, R.S., Abbeel, P., Levine, S., Finn, C.: Learning to adapt in dynamic, real-world environments through meta-reinforcement learning. In: Proceedings of the international conference on learning representations, (2019)"},{"key":"9663_CR27","doi-asserted-by":"crossref","unstructured":"Hester, T., Vecerik, M., Pietquin, O., Lanctot, M., Schaul, T., Piot, B., Horgan, D., Quan, J., Sendonaris, A., Osband, I., et al.: Deep q-learning from demonstrations. In: Proceedings of the AAAI conference on artificial intelligence, pp. 3223\u20133230 (2018)","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"9663_CR28","unstructured":"Kirsch, L., van Steenkiste, S., Schmidhuber, J.: Improving generalization in meta reinforcement learning using learned objectives. In: Proceedings of the international conference on learning representations, (2020). https:\/\/openreview.net\/forum?id=S1evHerYPr"},{"issue":"3","key":"9663_CR29","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1037\/h0074898","volume":"8","author":"RS Woodworth","year":"1901","unstructured":"Woodworth, R.S., Thorndike, E.: The influence of improvement in one mental function upon the efficiency of other functions.(i). Psychol. Rev. 8(3), 247\u2013261 (1901)","journal-title":"Psychol. Rev."},{"key":"9663_CR30","unstructured":"Anderson, J.R.: Cognitive psychology and its implications (7th Edition) Worth (2009)"},{"issue":"s1","key":"9663_CR31","first-page":"39","volume":"36","author":"H Wang","year":"2008","unstructured":"Wang, H., Gao, Y., Chen, X.: Transfer of reinforcement learning: the state of the art. Acta Electr. Sin. 36(s1), 39\u201343 (2008)","journal-title":"Acta Electr. Sin."},{"key":"9663_CR32","unstructured":"Silver, D.L., Yang, Q., Li, L.: Lifelong machine learning systems: beyond learning algorithms. In: Proceedings of the 2013 AAAI spring symposium series (2013)"},{"key":"9663_CR33","unstructured":"Brunskill, E., Li, L.: Pac-inspired option discovery in lifelong reinforcement learning. In: Proceedings of the 31st international conference on machine learning, vol\u00a032, pp. 316\u2013324. (2014)"},{"key":"9663_CR34","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10(7), (2009)","DOI":"10.1007\/978-3-642-01882-4_2"},{"key":"9663_CR35","first-page":"168","volume":"2013","author":"Approximate policy iteration with demonstration data","year":"2013","unstructured":"Approximate policy iteration with demonstration data: Kim, B., Farahmand, A.m., Pineau, J., Precup, D. RLDM 2013, 168\u2013172 (2013)","journal-title":"RLDM"},{"key":"9663_CR36","unstructured":"Kim, B., Farahmand, A.m., Pineau, J., Precup, D.: (2013b) Learning from limited demonstrations. In: Proceedings of the advances in neural information processing systems, vol. 26, pp. 2859\u20132867. (2013a)"},{"issue":"3","key":"9663_CR37","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1007\/s11768-011-1005-3","volume":"9","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas, D.P.: Approximate policy iteration: a survey and some new methods. J. Control Theory Appl. 9(3), 310\u2013335 (2011). https:\/\/doi.org\/10.1007\/s11768-011-1005-3","journal-title":"J. Control Theory Appl."},{"key":"9663_CR38","doi-asserted-by":"crossref","unstructured":"Piot, B., Geist, M., Pietquin, O.: Boosted bellman residual minimization handling expert demonstrations. In: Calders, T., Esposito, F., H\u00fcllermeier, E., Meo, R. (eds) Proceedings of the machine learning and knowledge discovery in databases, Berlin, Heidelberg, pp. 549\u2013564. (2014)","DOI":"10.1007\/978-3-662-44851-9_35"},{"key":"9663_CR39","unstructured":"Chemali, J., Lazaric, A.: Direct policy iteration with demonstrations. In: Proceedings of the 24th international joint conference on artificial intelligence, pp. 3380\u20133386. (2015)"},{"key":"9663_CR40","unstructured":"Gao, Y., Huazhe, X., Lin, J., Yu, F., Levine, S., Darrell, T.: Reinforcement learning from imperfect demonstrations. In: 6th International conference on learning representations, (2018)"},{"key":"9663_CR41","doi-asserted-by":"publisher","unstructured":"Jing, M., Ma, X., Huang, W., Sun, F., Yang, C., Fang, B., Liu, H.: Reinforcement learning from imperfect demonstrations under soft expert guidance. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp. 5109\u20135116. (2020). https:\/\/doi.org\/10.1609\/aaai.v34i04.5953","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"9663_CR42","doi-asserted-by":"publisher","unstructured":"Lazaric, A., Restelli, M., Bonarini, A.: Transfer of samples in batch reinforcement learning. In: Proceedings of the 25th international conference on machine learning, New York, NY, USA, pp. 544\u2013551. https:\/\/doi.org\/10.1145\/1390156.1390225","DOI":"10.1145\/1390156.1390225"},{"key":"9663_CR43","doi-asserted-by":"publisher","unstructured":"Cortes, C., Mohri, M., Riley, M., Rostamizadeh, A.: Sample selection bias correction theory. In: Proceedings of the international conference on algorithmic learning theory, pp. 38\u201353, (2008). https:\/\/doi.org\/10.1007\/978-3-540-87987-9_8","DOI":"10.1007\/978-3-540-87987-9_8"},{"key":"9663_CR44","doi-asserted-by":"crossref","unstructured":"Laroche, R., Barlier, M.: Transfer reinforcement learning with shared dynamics. In: Proceedings of the AAAI conference on artificial intelligence, vol 31. pp. 2147\u20132153. (2017)","DOI":"10.1609\/aaai.v31i1.10796"},{"key":"9663_CR45","unstructured":"Tirinzoni, A., Sessa, A., Pirotta, M., Restelli, M.: Importance weighted transfer of samples in reinforcement learning. In: Proceedings of the 35th international conference on machine learning, vol. 80, pp. 4936\u20134945. (2018)"},{"key":"9663_CR46","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. J. Mach. Learn. Res. 6, 503\u2013556 (2005)","journal-title":"J. Mach. Learn. Res."},{"key":"9663_CR47","unstructured":"Fakoor, R., Chaudhari, P., Soatto, S., Smola, A.J.: Meta-q-learning. In: Proceedings of the international conference on learning representations (2020)"},{"key":"9663_CR48","doi-asserted-by":"crossref","unstructured":"Harutyunyan, A., Devlin, S., Vrancx, P., Now\u00e9, A.: Expressing arbitrary reward functions as potential-based advice. In: Proceedings of the AAAI conference on artificial intelligence, vol. 29, pp. 2652\u20132658. (2015)","DOI":"10.1609\/aaai.v29i1.9628"},{"key":"9663_CR49","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: ICML, vol. 99, pp. 278\u2013287. (1999)"},{"key":"9663_CR50","unstructured":"Wiewiora, E., Cottrell, G.W., Elkan, C.: Principled methods for advising reinforcement learning agents. In: Proceedings of the 20th international conference on machine learning (ICML-03), pp. 792\u2013799. (2003)"},{"key":"9663_CR51","unstructured":"Devlin, S.M., Kudenko. D.: Dynamic potential-based reward shaping. In: Proceedings of the 11th international conference on autonomous agents and multiagent systems, pp. 433\u2013440. (2012)"},{"key":"9663_CR52","unstructured":"Brys, T., Harutyunyan, A., Suay, H.B., Chernova, S., Taylor, M.E., Now\u00e9, A.: Reinforcement learning from demonstration through shaping. In: Proceedings of the 24th international joint conference on artificial intelligence, pp. 3352\u20133358. (2015a)"},{"key":"9663_CR53","unstructured":"Brys, T., Harutyunyan, A., Taylor, M.E., Now\u00e9, A.: Policy transfer using reward shaping. In: Proceedings of the AAMAS, pp. 181\u2013188. (2015b)"},{"key":"9663_CR54","doi-asserted-by":"crossref","unstructured":"Marom, O., Rosman, B.: Belief reward shaping in reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, vol. 32, pp. 3762\u20133769. (2018)","DOI":"10.1609\/aaai.v32i1.11741"},{"key":"9663_CR55","doi-asserted-by":"crossref","unstructured":"Bengio, S., Bengio, Y., Cloutier, J., Gecsei, J.: On the optimization of a synaptic learning rule. In: Proceedings of the optimality in artificial and biological neural networks. (1992)","DOI":"10.1007\/978-1-4471-2063-6_131"},{"key":"9663_CR56","unstructured":"Andrychowicz, M., Denil, M., G\u00f3mez, S., Hoffman, M.W., Pfau, D., Schaul, T., Shillingford, B., de\u00a0Freitas, N.: Learning to learn by gradient descent by gradient descent. In: Proceedings of the advances in neural information processing systems, vol. 29, pp. 3981\u20133989. (2016)"},{"key":"9663_CR57","unstructured":"Nichol, A., Achiam, J., Schulman, J.: On first-order meta-learning algorithms. (2018). arXiv:1803.02999"},{"key":"9663_CR58","unstructured":"Deleu, T., Bengio, Y.: The effects of negative adaptation in model-agnostic meta-learning. (2018). arXiv:1812.02159"},{"key":"9663_CR59","doi-asserted-by":"publisher","first-page":"45454","DOI":"10.1109\/access.2021.3066513","volume":"9","author":"KI Lee","year":"2021","unstructured":"Lee, K.I., Lee, S., Song, B.C.: Zero-shot knowledge distillation using label-free adversarial perturbation with taylor approximation. IEEE Access. 9, 45454\u201345461 (2021). https:\/\/doi.org\/10.1109\/access.2021.3066513","journal-title":"IEEE Access."},{"key":"9663_CR60","doi-asserted-by":"publisher","first-page":"104096","DOI":"10.1016\/j.imavis.2021.104096","volume":"108","author":"LT Nguyen-Meidine","year":"2021","unstructured":"Nguyen-Meidine, L.T., Belal, A., Kiran, M., Dolz, J., Blais-Morin, L.A., Granger, E.: Knowledge distillation methods for efficient unsupervised adaptation across multiple domains. Image Vis. Comput. 108, 104096 (2021). https:\/\/doi.org\/10.1016\/j.imavis.2021.104096","journal-title":"Image Vis. Comput."},{"key":"9663_CR61","unstructured":"Rusu, A.A., Colmenarejo, S.G., Gulcehre, C., Desjardins, G., Kirkpatrick, J., Pascanu, R., Mnih, V., Kavukcuoglu, K., Hadsell, R.: Policy distillation. In: Proceedings of the international conference on learning representations, (2016)"},{"issue":"1\u20132","key":"9663_CR62","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/s0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999). https:\/\/doi.org\/10.1016\/s0004-3702(99)00052-1","journal-title":"Artif. Intell."},{"key":"9663_CR63","doi-asserted-by":"crossref","unstructured":"Bacon, P.L., Harb, J., Precup, D.: The option-critic architecture. In: Proceedings of the AAAI conference on artificial intelligence, vol. 31, pp. 1726\u20131734. (2017)","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"9663_CR64","unstructured":"Riemer, M., Liu, M., Tesauro, G.: Learning abstract options. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds) Proceedings of the advances in neural information processing systems, vol. 31, pp. 10445\u201310455. (2018)"},{"key":"9663_CR65","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning. In: Proceedings of the advances in neural information processing systems, vol. 5, pp. 271\u2013278. (1993)"},{"key":"9663_CR66","unstructured":"Jothimurugan, K., Bastani, O., Alur, R.: Abstract value iteration for hierarchical reinforcement learning. In: Proceedings of the international conference on artificial intelligence and statistics, vol. 130, pp. 1162\u20131170. (2021)"},{"key":"9663_CR67","doi-asserted-by":"publisher","unstructured":"Rafati, J., Noelle, D.C.: Learning representations in model-free hierarchical reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, vol. 33, pp. 10009\u201310010. (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.330110009","DOI":"10.1609\/aaai.v33i01.330110009"},{"issue":"1\u20132","key":"9663_CR68","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discrete Event Dyn. Syst."},{"key":"9663_CR69","doi-asserted-by":"publisher","unstructured":"Abel, D.: A theory of state abstraction for reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, vol. 33, pp. 9876\u20139877. (2019) https:\/\/doi.org\/10.1609\/aaai.v33i01.33019876","DOI":"10.1609\/aaai.v33i01.33019876"},{"key":"9663_CR70","unstructured":"Abel, D., Hershkowitz, D., Littman, M.: Near optimal behavior via approximate state abstraction. In: Proceedings of the international conference on machine learning, New York, New York, USA, Proceedings of Machine Learning Research, vol. 48, pp. 2915\u20132923. (2016)"},{"key":"9663_CR71","unstructured":"Li, L., Walsh, T.J., Littma, M.L.: Towards a unified theory of state abstraction for mdps. In: Proceedings of the international symposium on artificial intelligence and mathematics. (2006)"},{"key":"9663_CR72","unstructured":"Abel, D., Arumugam, D., Lehnert, L., Littman, M.: State abstractions for lifelong reinforcement learning. In: Proceedings of the international conference on machine learning, vol. 80, pp. 10\u201319. (2018)"},{"key":"9663_CR73","doi-asserted-by":"publisher","unstructured":"Valiant, L.G.: A theory of the learnable. In: Proceedings of the 16th annual ACM symposium on Theory of computing - STOC \u201984, (1984). https:\/\/doi.org\/10.1145\/800057.808710","DOI":"10.1145\/800057.808710"},{"key":"9663_CR74","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: Openai gym. (2016). arXiv:1606.01540"},{"key":"9663_CR75","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence, (2016) https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI16\/paper\/view\/12389","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"9663_CR76","doi-asserted-by":"publisher","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: Proceedings of the 2012 IEEE\/RSJ international conference on intelligent robots and systems, pp. 5026\u20135033. (2012). https:\/\/doi.org\/10.1109\/IROS.2012.6386109","DOI":"10.1109\/IROS.2012.6386109"},{"key":"9663_CR77","doi-asserted-by":"publisher","unstructured":"Yao, H., Zhang, C., Wei, Y., Jiang, M., Wang, S., Huang, J., Chawla, N., Li, Z.: Graph few-shot learning via knowledge transfer. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp. 6656\u20136663. (2020). https:\/\/doi.org\/10.1609\/aaai.v34i04.6142","DOI":"10.1609\/aaai.v34i04.6142"},{"key":"9663_CR78","doi-asserted-by":"publisher","unstructured":"Zhang, C., Yao, H., Huang, C., Jiang, M., Li, Z., Chawla, NV.: Few-shot knowledge graph completion. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp. 3041\u20133048. (2020). https:\/\/doi.org\/10.1609\/aaai.v34i03.5698","DOI":"10.1609\/aaai.v34i03.5698"},{"issue":"2","key":"9663_CR79","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1109\/tnn.2010.2091281","volume":"22","author":"SJ Pan","year":"2011","unstructured":"Pan, S.J., Tsang, I.W., Kwok, J.T., Yang, Q.: Domain adaptation via transfer component analysis. IEEE Trans. Neural Netw. 22(2), 199\u2013210 (2011). https:\/\/doi.org\/10.1109\/tnn.2010.2091281","journal-title":"IEEE Trans. Neural Netw."},{"issue":"1","key":"9663_CR80","first-page":"2096","volume":"17","author":"Y Ganin","year":"2016","unstructured":"Ganin, Y., Ustinova, E., Ajakan, H., Germain, P., Larochelle, H., Laviolette, F., Marchand, M., Lempitsky, V.: Domain-adversarial training of neural networks. J. Mach. Learn. Res. 17(1), 2096\u20132030 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"9663_CR81","unstructured":"Motiian, S., Jones, Q., Iranmanesh, S., Doretto, G.: Few-shot adversarial domain adaptation. In: Proceedings of the advances in neural information processing systems, vol. 30, pp. 6670\u20136680. (2017)"},{"key":"9663_CR82","doi-asserted-by":"publisher","unstructured":"Zou, H., Zhou, Y., Yang, J., Liu, H., Das, H.P., Spanos, C.J.: Consensus adversarial domain adaptation. Proc. AAAI Conf. Artif. Intell. 33(1), 5997\u20136004 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33015997","DOI":"10.1609\/aaai.v33i01.33015997"},{"key":"9663_CR83","unstructured":"Parisotto, E., Ba, J.L., Salakhutdinov, R.: Actor-mimic: Deep multitask and transfer reinforcement learning. In: Proceedings of the 4th international conference on learning representations, pp. 156\u2013171. (2016)"},{"key":"9663_CR84","doi-asserted-by":"crossref","unstructured":"Nguyen, T., Luu, T., Pham, T., Rakhimkul, S., Yoo, C.D.: Robust maml: prioritization task buffer with adaptive learning process for model-agnostic meta-learning. (2021). arXiv:2103.08233","DOI":"10.1109\/ICASSP39728.2021.9413446"},{"key":"9663_CR85","doi-asserted-by":"publisher","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th annual international conference on machine learning, New York, NY, USA, ICML \u201909, pp. 41\u201348. (2009). https:\/\/doi.org\/10.1145\/1553374.1553380","DOI":"10.1145\/1553374.1553380"},{"key":"9663_CR86","doi-asserted-by":"crossref","unstructured":"Lecarpentier, E., Abel, D., Asadi, K., Jinnai, Y., Rachelson, E., Littman, M.L.: Lipschitz lifelong reinforcement learning. 35, 8270\u20138278 (2021)","DOI":"10.1609\/aaai.v35i9.17006"},{"key":"9663_CR87","doi-asserted-by":"crossref","unstructured":"Hester, T., Stone, P.: TEXPLORE: real-time sample-efficient reinforcement learning for\u00a0robots. Mach. Learn. 90(3), 385\u2013429 (2012). https:\/\/doi.org\/10.1007\/s10994-012-5322-7","DOI":"10.1007\/s10994-012-5322-7"},{"key":"9663_CR88","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Coates, A., Quigley, M., Ng, A.: An application of reinforcement learning to aerobatic helicopter flight. In: Proceedings of the advances in neural information processing systems, vol. 19, pp. 1\u20138. (2007)","DOI":"10.7551\/mitpress\/7503.003.0006"},{"issue":"43","key":"9663_CR89","first-page":"1265","volume":"6","author":"G Shani","year":"2005","unstructured":"Shani, G., Heckerman, D., Brafman, R.I.: An mdp-based recommender system. J. Mach. Learn. Res. 6(43), 1265\u20131295 (2005)","journal-title":"J. Mach. Learn. Res."},{"key":"9663_CR90","doi-asserted-by":"publisher","unstructured":"Saleh, A., Jaques, N., Ghandeharioun, A., Shen, J., Picard, R.: Hierarchical reinforcement learning for open-domain dialog. In: Proceedings of the AAAI conference on artificial intelligence, vol.\u00a034, pp. 8741\u20138748. (2020). https:\/\/doi.org\/10.1609\/aaai.v34i05.6400","DOI":"10.1609\/aaai.v34i05.6400"},{"key":"9663_CR91","unstructured":"Sohn, S., Woo, H., Choi, J., Lee, H.: Meta reinforcement learning with autonomous inference of subtask dependencies. In: Proceedings of the international conference on learning representations, (2020)"}],"container-title":["Journal of Grid Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10723-023-09663-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10723-023-09663-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10723-023-09663-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,14]],"date-time":"2023-12-14T17:43:46Z","timestamp":1702575826000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10723-023-09663-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":91,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["9663"],"URL":"https:\/\/doi.org\/10.1007\/s10723-023-09663-0","relation":{},"ISSN":["1570-7873","1572-9184"],"issn-type":[{"value":"1570-7873","type":"print"},{"value":"1572-9184","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6]]},"assertion":[{"value":"9 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 April 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interest"}}],"article-number":"30"}}