{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T04:04:45Z","timestamp":1750565085780,"version":"3.41.0"},"reference-count":28,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.23919\/acc.2017.7963531","type":"proceedings-article","created":{"date-parts":[[2017,7,10]],"date-time":"2017-07-10T21:39:58Z","timestamp":1499722798000},"page":"3764-3770","source":"Crossref","is-referenced-by-count":5,"title":["Belief space stochastic control under unknown dynamics"],"prefix":"10.23919","author":[{"given":"Yunpeng","family":"Pan","sequence":"first","affiliation":[]},{"given":"Kamil","family":"Saigol","sequence":"additional","affiliation":[]},{"given":"Evangelos A.","family":"Theodorou","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"3137","article-title":"A generalized path integral control approach to reinforcement learning","volume":"11","author":"theodorou","year":"2010","journal-title":"The Journal of Machine Learning Research"},{"key":"ref11","first-page":"281","article-title":"Path integral policy improvement with covariance matrix adaptation","author":"stulp","year":"2012","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref12","first-page":"1628","article-title":"Path integral control by reproducing kernel hilbert space embedding","author":"rawlik","year":"2013","journal-title":"Proceedings of the Twenty-Third InternationalJoint Conference on Artificial Intelligence IJCAI'13"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2014.7010616"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1007\/978-3-662-44848-9_31","article-title":"Policy search for path integral control","author":"g\u00f3mez","year":"2014","journal-title":"Machine Learning and Knowledge Discovery in Databases"},{"key":"ref15","first-page":"2305","article-title":"Sample efficient path integral control under uncertainty","author":"pan","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1002\/9781118453988.ch6","article-title":"Linearly solvable optimal control","author":"dvijotham","year":"2012","journal-title":"Reinforcement Learning and Approximate Dynamic Programming for Feedback Control"},{"key":"ref17","first-page":"1865","article-title":"Sparse spectrum gaussian process regression","volume":"99","author":"l\u00e1zaro-gredilla","year":"2010","journal-title":"The Journal of Machine Learning Research"},{"key":"ref18","first-page":"1177","article-title":"Random features for large-scale kernel machines","author":"rahimi","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2012.08.011"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"journal-title":"Controlled Markov processes and viscosity solutions Applications of Mathematics","year":"1993","author":"fleming","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2012.2210294"},{"key":"ref3","first-page":"329","article-title":"Exit probabilities and optimal stochastic control","volume":"9","author":"fleming","year":"1971","journal-title":"Applied Math Optim"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/2005\/11\/P11011"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.95.200201"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.91.032104"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1063\/1.2709596","article-title":"An introduction to stochastic control theory, path integrals and reinforcement learning","volume":"887","author":"kappen","year":"2007","journal-title":"AIP Conference Proceedings"},{"journal-title":"Handbook of Learning and Approximate Dynamic Programming","year":"2004","author":"barto","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0710743106"},{"key":"ref1","first-page":"15","article-title":"Neuro-dynamic programming (optimization and neural computation series, 3)","volume":"7","author":"bertsekas","year":"1996","journal-title":"Athena Scientific"},{"journal-title":"Gaussian Processes for Machine Learning","year":"2006","author":"williams","key":"ref20"},{"journal-title":"Adaptive probabilistic trajectory optimization via efficient approximate inference","year":"2016","author":"pan","key":"ref22"},{"journal-title":"Fourier Analysis on Groups","year":"1962","author":"rudin","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2012.6426381"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.218"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1162\/106365601750190398"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3390\/e17053352"}],"event":{"name":"2017 American Control Conference (ACC)","start":{"date-parts":[[2017,5,24]]},"location":"Seattle, WA, USA","end":{"date-parts":[[2017,5,26]]}},"container-title":["2017 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7951530\/7962914\/07963531.pdf?arnumber=7963531","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T20:26:31Z","timestamp":1750537591000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7963531\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":28,"URL":"https:\/\/doi.org\/10.23919\/acc.2017.7963531","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}