{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T17:04:04Z","timestamp":1775840644673,"version":"3.50.1"},"reference-count":22,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2020,7,9]],"date-time":"2020-07-09T00:00:00Z","timestamp":1594252800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,7,9]],"date-time":"2020-07-09T00:00:00Z","timestamp":1594252800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1007\/s10489-020-01748-7","type":"journal-article","created":{"date-parts":[[2020,7,9]],"date-time":"2020-07-09T13:12:01Z","timestamp":1594300321000},"page":"4050-4062","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Asynchronous framework with Reptile+ algorithm to meta learn partially observable Markov decision process"],"prefix":"10.1007","volume":"50","author":[{"given":"Dang Quang","family":"Nguyen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ngo Anh","family":"Vien","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viet-Hung","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"TaeChoong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,7,9]]},"reference":[{"key":"1748_CR1","unstructured":"Antoniou A, Edwards H, Storkey AJ (2018) How to train your MAML. CoRR arXiv:1810.09502"},{"key":"1748_CR2","unstructured":"Babaeizadeh M, Frosio I, Tyree S, Clemons J, Kautz J (2017) Reinforcement learning through asynchronous advantage actor-critic on a GPU. In: 5th international conference on learning representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings"},{"key":"1748_CR3","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) Openai gym. CoRR arXiv:1606.01540"},{"key":"1748_CR4","unstructured":"Deisenroth MP, Rasmussen CE (2011) PILCO: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th international conference on machine learning, ICML 2011, Bellevue, Washington, USA, June 28 - July 2, 2011, pp 465\u2013472"},{"key":"1748_CR5","unstructured":"Duan Y, Schulman J, Chen X, Bartlett PL, Sutskever I, Abbeel P (2016) Rl$\u02c62$: fast reinforcement learning via slow reinforcement learning. CoRR arXiv:1611.02779"},{"key":"1748_CR6","unstructured":"Espeholt L, Soyer H, Munos R, Simonyan K, Mnih V, Ward T, Doron Y, Firoiu V, Harley T, Dunning I, Legg S, Kavukcuoglu K (2018) IMPALA: scalable distributed deep-rl with importance weighted actor-learner architectures. In: Proceedings of the 35th international conference on machine learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018, pp 1406\u20131415"},{"key":"1748_CR7","unstructured":"Finn C, Abbeel P, Levine S (2017) Model-agnostic meta-learning for fast adaptation of deep networks. In: Proceedings of the 34th international conference on machine learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017 1126\u20131135"},{"key":"1748_CR8","unstructured":"Gruslys A, Dabney W, Azar MG, Piot B, Bellemare MG, Munos R (2018) The reactor: a fast and sample-efficient actor-critic agent for reinforcement learning. In: 6th international conference on learning representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings"},{"key":"1748_CR9","unstructured":"Hausknecht MJ, Stone P (2015) Deep recurrent q-learning for partially observable mdps. In: 2015 AAAI fall symposia, Arlington, Virginia, USA, November 12-14,2015, pp 29\u201337"},{"key":"1748_CR10","unstructured":"Heess N, Hunt JJ, Lillicrap TP, Silver D (2015) Memory-based control with recurrent neural networks. CoRR arXiv:1512.04455"},{"key":"1748_CR11","unstructured":"Kapturowski S, Ostrovski G, Dabney W, Munos . (2019) Recurrent experience replay in distributed reinforcement learning. In: ICLR 2019"},{"key":"1748_CR12","unstructured":"Levine S, Abbeel P (2014) Learning neural network policies with guided policy search under unknown dynamics. In: Advances in neural information processing systems 27: annual conference on neural information processing systems 2014, December 8-13 2014, Montreal, Quebec, Canada, pp 1071\u20131079"},{"key":"1748_CR13","unstructured":"Li Z, Zhou F, Chen F, Li H (2017) Meta-sgd: learning to learn quickly for few shot learning. CoRR arXiv:1707.09835"},{"key":"1748_CR14","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: 4th international conference on learning representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings. arXiv:1509.02971"},{"key":"1748_CR15","unstructured":"Loshchilov I, Hutter F (2019) Decoupled weight decay regularization. In: International conference on learning representations"},{"key":"1748_CR16","unstructured":"Maron GB, Hoffman MW, Budden D, Dabney W, Horgan D, TB D, Muldal A, Heess N, Lillicrap TP (2018) Distributed distributional deterministic policy gradients. In: 6th international conference on learning representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings"},{"key":"1748_CR17","unstructured":"Mishra N, Rohaninejad M, Chen X, Abbeel P (2018) A simple neural attentive meta-learner. In: 6th international conference on learning representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings"},{"key":"1748_CR18","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap TP, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33nd international conference on machine learning, ICML 2016, New York City, NY, USA, June 19-24, 2016 1928\u20131937"},{"key":"1748_CR19","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller MA (2013) Playing atari with deep reinforcement learning. CoRR arXiv:1312.5602"},{"key":"1748_CR20","unstructured":"Nichol A, Achiam J, Schulman J (2018) On first-order meta-learning algorithms. CoRR arXiv:1803.02999"},{"key":"1748_CR21","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller MA (2014) Deterministic policy gradient algorithms. In: Proceedings of the 31th international conference on machine learning, ICML 2014, Beijing, China, 21-26 June 2014 387\u2013395"},{"key":"1748_CR22","unstructured":"Stadie BC, Yang G, Houthooft R, Chen X, Duan Y, Wu Y, Abbeel P, Sutskever I (2018) Some considerations on learning to explore via meta-reinforcement learning. CoRR arXiv:1803.01118"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-020-01748-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-020-01748-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-020-01748-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T23:23:43Z","timestamp":1625786623000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-020-01748-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,9]]},"references-count":22,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2020,11]]}},"alternative-id":["1748"],"URL":"https:\/\/doi.org\/10.1007\/s10489-020-01748-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,7,9]]},"assertion":[{"value":"9 July 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}