{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T14:08:14Z","timestamp":1784210894573,"version":"3.55.0"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1613213"],"award-info":[{"award-number":["U1613213"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61375005"],"award-info":[{"award-number":["61375005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61503383"],"award-info":[{"award-number":["61503383"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61210009"],"award-info":[{"award-number":["61210009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61627808"],"award-info":[{"award-number":["61627808"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["91648205"],"award-info":[{"award-number":["91648205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61702516"],"award-info":[{"award-number":["61702516"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61473236"],"award-info":[{"award-number":["61473236"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Research and Development Plan of China","award":["2017YFB1300202"],"award-info":[{"award-number":["2017YFB1300202"]}]},{"name":"National Key Research and Development Plan of China","award":["2016YFC0300801"],"award-info":[{"award-number":["2016YFC0300801"]}]},{"DOI":"10.13039\/501100002855","name":"Ministry of Science and Technology of the People's Republic of China","doi-asserted-by":"publisher","award":["2015BAK35B00"],"award-info":[{"award-number":["2015BAK35B00"]}],"id":[{"id":"10.13039\/501100002855","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002855","name":"Ministry of Science and Technology of the People's Republic of China","doi-asserted-by":"publisher","award":["2015BAK35B01"],"award-info":[{"award-number":["2015BAK35B01"]}],"id":[{"id":"10.13039\/501100002855","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007162","name":"Guangdong Science and Technology Department","doi-asserted-by":"publisher","award":["2016B090910001"],"award-info":[{"award-number":["2016B090910001"]}],"id":[{"id":"10.13039\/501100007162","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Suzhou Science and Technology Program","award":["SYG201712"],"award-info":[{"award-number":["SYG201712"]}]},{"name":"Suzhou Science and Technology Program","award":["SZS201613"],"award-info":[{"award-number":["SZS201613"]}]},{"name":"Strategic Priority Research Program of the Chinese Academy of Science","award":["XDB02080003"],"award-info":[{"award-number":["XDB02080003"]}]},{"name":"Key Program Special Fund in XJTLU","award":["KSF-A-01"],"award-info":[{"award-number":["KSF-A-01"]}]},{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/M026981\/1"],"award-info":[{"award-number":["EP\/M026981\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2019,1]]},"DOI":"10.1109\/tsmc.2018.2800040","type":"journal-article","created":{"date-parts":[[2018,2,19]],"date-time":"2018-02-19T23:12:17Z","timestamp":1519081937000},"page":"216-226","source":"Crossref","is-referenced-by-count":35,"title":["Guided Policy Search for Sequential Multitask Learning"],"prefix":"10.1109","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2592-4096","authenticated-orcid":false,"given":"Fangzhou","family":"Xiong","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6677-8673","authenticated-orcid":false,"given":"Biao","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0553-4581","authenticated-orcid":false,"given":"Xu","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6384-3687","authenticated-orcid":false,"given":"Hong","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3034-9639","authenticated-orcid":false,"given":"Kaizhu","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8080-082X","authenticated-orcid":false,"given":"Amir","family":"Hussain","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2148-1846","authenticated-orcid":false,"given":"Zhiyong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref32","first-page":"3303","article-title":"A new upper bound for Kullback&#x2013;Leibler divergence","volume":"5","author":"sayyareh","year":"2011","journal-title":"Appl Math Sci"},{"key":"ref31","article-title":"Revisiting natural gradient for deep networks","author":"pascanu","year":"2014","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref30","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref10","first-page":"4008","article-title":"Guided policy search via approximate mirror descent","author":"montgomery","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989384"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2011.2106494"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2016.7525525"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2016.2608969"},{"key":"ref15","article-title":"Learning from delayed rewards","author":"watkins","year":"1989"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-585-33656-5_7"},{"key":"ref17","first-page":"720","article-title":"Probabilistic policy reuse in a reinforcement learning agent","author":"fern\u00e1ndez","year":"2006","journal-title":"Proc Int Joint Conf Autonomous Agents and Multiagent Systems"},{"key":"ref18","first-page":"1206","article-title":"Online multi-task learning for policy gradient methods","author":"ammar","year":"2014","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-013-5379-y"},{"key":"ref28","first-page":"1613","article-title":"Large scale online kernel learning","volume":"17","author":"lu","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6095096"},{"key":"ref27","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1177\/0278364907084980"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-007-9079-x"},{"key":"ref5","first-page":"1","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref8","first-page":"207","article-title":"Variational policy search via trajectory optimization","author":"levine","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref7","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref2","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref9","first-page":"1071","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref20","first-page":"643","article-title":"Online learning of multiple tasks and their relationships","author":"saha","year":"2011","journal-title":"Proc 14th Int Conf Artificial Intell"},{"key":"ref22","first-page":"1","article-title":"End-to-end lifelong learning: A framework to achieve plasticities of both the feature and classifier constructions","author":"hao","year":"2017","journal-title":"Cognitive Computation"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5529-2_8"},{"key":"ref24","first-page":"507","article-title":"ELLA: An efficient lifelong learning algorithm","author":"ruvolo","year":"2013","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref23","first-page":"1306","article-title":"Toward an architecture for never-ending language learning","volume":"5","author":"carlson","year":"2010","journal-title":"Proc Assoc Adv Artif Intell"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"3521","DOI":"10.1073\/pnas.1611835114","article-title":"Overcoming catastrophic forgetting in neural networks","volume":"113","author":"kirkpatrick","year":"2017","journal-title":"Proc Nat Acad Sci USA"},{"key":"ref25","first-page":"614","article-title":"Learning without forgetting","author":"li","year":"2016","journal-title":"Proc Eur Conf Comput Vis"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221021\/8577049\/08294227.pdf?arnumber=8294227","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T21:06:53Z","timestamp":1657746413000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8294227\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,1]]},"references-count":33,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2018.2800040","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"value":"2168-2216","type":"print"},{"value":"2168-2232","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,1]]}}}