{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T17:25:22Z","timestamp":1761845122983,"version":"3.37.3"},"reference-count":23,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Key Research and Development Plan","award":["2016YFE0128700"],"award-info":[{"award-number":["2016YFE0128700"]}]},{"DOI":"10.13039\/501100003787","name":"Natural Science Foundation of Hebei Province","doi-asserted-by":"publisher","award":["E2017202270"],"award-info":[{"award-number":["E2017202270"]}],"id":[{"id":"10.13039\/501100003787","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research and Development Plan of Hebei Province","award":["18211816D"],"award-info":[{"award-number":["18211816D"]}]},{"name":"National Key Research and Development Plan","award":["2017YFB1301002"],"award-info":[{"award-number":["2017YFB1301002"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2019]]},"DOI":"10.1109\/access.2019.2959876","type":"journal-article","created":{"date-parts":[[2019,12,17]],"date-time":"2019-12-17T00:34:11Z","timestamp":1576542851000},"page":"184457-184467","source":"Crossref","is-referenced-by-count":8,"title":["Enhanced Probabilistic Inference Algorithm Using Probabilistic Neural Networks for Learning Control"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0840-5796","authenticated-orcid":false,"given":"Yang","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1335-2852","authenticated-orcid":false,"given":"Shijie","family":"Guo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2840-9356","authenticated-orcid":false,"given":"Lishuang","family":"Zhu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5667-8054","authenticated-orcid":false,"given":"Toshiharu","family":"Mukai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2709-6148","authenticated-orcid":false,"given":"Zhongxue","family":"Gan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1016\/j.robot.2004.03.001","article-title":"Learning from demonstration","volume":"47","author":"schaal","year":"2004","journal-title":"Robot Automat System"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1016\/j.conb.2008.08.003"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/TPAMI.2013.218"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1016\/j.neucom.2008.12.019"},{"key":"ref14","first-page":"154","article-title":"Bayes meets Bellman: The Gaussian process approach to temporal difference learning","author":"engel","year":"2003","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref15","first-page":"751","article-title":"Gaussian processes in reinforcement learning","author":"rasmussen","year":"2004","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1007\/978-3-642-15939-8_30"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/TCYB.2019.2903108"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/9.333786"},{"key":"ref19","first-page":"1047","article-title":"Exploiting model uncertainty estimates for safe dynamic control learning","author":"schneider","year":"1997","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ROBOT.1997.606886"},{"year":"2016","author":"jaderberg","journal-title":"Reinforcement learning with unsupervised auxiliary tasks","key":"ref3"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/9.580874"},{"year":"2013","author":"mnih","journal-title":"Playing atari with deep reinforcement learning","key":"ref5"},{"key":"ref8","article-title":"Improving PILCO with Bayesian neural network dynamics models","author":"gal","year":"2016","journal-title":"Int Conf Mach Learn"},{"year":"2015","author":"lillicrap","journal-title":"Continuous control with deep reinforcement learning","key":"ref7"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1038\/nature16961"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1038\/nature14236"},{"year":"2015","author":"stadie","journal-title":"Incentivizing Exploration In Reinforcement Learning With Deep Predictive Models","key":"ref9"},{"key":"ref20","first-page":"465","article-title":"PILCO: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proc Int Conf Mach Learn"},{"year":"2017","author":"mcallister","article-title":"Bayesian learning for data-efficient control","key":"ref22"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1016\/j.neucom.2009.06.009"},{"key":"ref23","article-title":"Reinforcement learning and the reward engineering principle","author":"dewey","year":"2014","journal-title":"AAAI Spring Symp"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8600701\/08933143.pdf?arnumber=8933143","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,10]],"date-time":"2021-08-10T19:41:18Z","timestamp":1628624478000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8933143\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/access.2019.2959876","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2019]]}}}