{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:18:10Z","timestamp":1750220290111,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,4,25]],"date-time":"2022-04-25T00:00:00Z","timestamp":1650844800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Defense University Research Instrumentation Program (DURIP), US"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,4,25]]},"DOI":"10.1145\/3477314.3507091","type":"proceedings-article","created":{"date-parts":[[2022,5,7]],"date-time":"2022-05-07T00:37:36Z","timestamp":1651883856000},"page":"748-757","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning adaptive control in dynamic environments using reproducing kernel priors with bayesian policy gradients"],"prefix":"10.1145","author":[{"given":"Apan","family":"Dastider","sequence":"first","affiliation":[{"name":"University of Central Florida"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sayyed Jaffar Ali","family":"Raza","sequence":"additional","affiliation":[{"name":"University of Central Florida"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingjie","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Central Florida"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,5,6]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1890\/15-1526.1","article-title":"Quantifying demographic uncertainty: Bayesian methods for integral projection models","volume":"86","author":"Elderd Bret D","year":"2016","unstructured":"Bret D Elderd and Tom EX Miller. 2016. Quantifying demographic uncertainty: Bayesian methods for integral projection models. Ecological Monographs 86, 1 (2016), 125--144.","journal-title":"Ecological Monographs"},{"key":"e_1_3_2_1_2_1","first-page":"457","article-title":"Bayesian policy gradient algorithms","volume":"19","author":"Engel Yaakov","year":"2007","unstructured":"Yaakov Engel and Mohammad Ghavamzadeh. 2007. Bayesian policy gradient algorithms. Advances in NIPS 19 (2007), 457.","journal-title":"Advances in NIPS"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160762"},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"Meta-learning shared strategies","volume":"2018","author":"Frans Kevin","year":"2018","unstructured":"Kevin Frans, Henry M Gunn, Jonathan Ho, Xi Chen, Pieter Abbeel, and John Schulman Openai. 2018. Meta-learning shared strategies: OpenAI. Iclr 2018 (2018), 1--11. https:\/\/s3-us-west-2.amazonaws.com\/openai-assets\/MLSH\/mlsh{_}paper.pdf","journal-title":"OpenAI. Iclr"},{"key":"e_1_3_2_1_5_1","first-page":"489","article-title":"Bayesian {Monte Carlo}","volume":"1","author":"Ghahramani Zoubin","year":"2002","unstructured":"Zoubin Ghahramani and Carl E Rasmussen. 2002. Bayesian {Monte Carlo}. Advances in NIPS 1 (2002), 489--496.","journal-title":"Advances in NIPS"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273534"},{"key":"e_1_3_2_1_7_1","volume-title":"NIPS","volume":"17","author":"Ghavamzadeh Mohammad","year":"2016","unstructured":"Mohammad Ghavamzadeh, Yaakov Engel, and Michal Valko. 2016. Bayesian policy gradient. In NIPS, Vol. 17."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1561\/1100000005"},{"key":"e_1_3_2_1_9_1","volume-title":"Q-prop: Sample-efficient policy gradient with an off-policy critic. arXiv preprint arXiv:1611.02247","author":"Gu Shixiang","year":"2016","unstructured":"Shixiang Gu, Timothy Lillicrap, Zoubin Ghahramani, Richard E Turner, and Sergey Levine. 2016. Q-prop: Sample-efficient policy gradient with an off-policy critic. arXiv preprint arXiv:1611.02247 (2016)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920987859"},{"key":"e_1_3_2_1_11_1","unstructured":"Beomjoon Kim and Joelle Pineau. 2013. Maximum Mean Discrepancy Imitation Learning.. In Robotics: Science and systems."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-006-9009-4"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research), Guy Lebanon and S. V. N. Vishwanathan (Eds.)","volume":"38","author":"Lever Guy","year":"2015","unstructured":"Guy Lever and Ronnie Stafford. 2015. Modelling Policies in MDPs in Reproducing Kernel Hilbert Space. In Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research), Guy Lebanon and S. V. N. Vishwanathan (Eds.), Vol. 38. PMLR, San Diego, California, USA, 590--598. http:\/\/proceedings.mlr.press\/v38\/lever15.html"},{"key":"e_1_3_2_1_14_1","unstructured":"G. Lever and Ronnie Stafford. 2015. Modelling Policies in MDPs in Reproducing Kernel Hilbert Space. In AISTATS."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11718"},{"key":"e_1_3_2_1_16_1","volume-title":"Article arXiv:2108.02425 (Aug.","author":"Li Yiming","year":"2021","unstructured":"Yiming Li, Tao Kong, Ruihang Chu, Yifeng Li, Peng Wang, and Lei Li. 2021. Simultaneous Semantic and Collision Learning for 6-DoF Grasp Pose Estimation. arXiv e-prints, Article arXiv:2108.02425 (Aug. 2021), arXiv:2108.02425 pages. arXiv:cs.RO\/2108.02425"},{"key":"e_1_3_2_1_17_1","volume-title":"4th International Conference on Learning Representations (ICLR 2016)","author":"Lillicrap Timothy P.","year":"2016","unstructured":"Timothy P. Lillicrap, Jonathan J. Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2016. Continuous control with deep reinforcement learning. 4th International Conference on Learning Representations (ICLR 2016) (2016). arXiv:1509.02971v6 http:\/\/arxiv.org\/abs\/1509.02971v6"},{"key":"e_1_3_2_1_18_1","volume-title":"Kernel mean embedding of distributions: A review and beyond. arXiv preprint arXiv:1605.09522","author":"Muandet Krikamol","year":"2016","unstructured":"Krikamol Muandet, Kenji Fukumizu, Bharath Sriperumbudur, and Bernhard Sch\u00f6lkopf. 2016. Kernel mean embedding of distributions: A review and beyond. arXiv preprint arXiv:1605.09522 (2016)."},{"key":"e_1_3_2_1_19_1","volume-title":"Monte Carlo is fundamentally unsound. The Statistician","author":"O'Hagan Anthony","year":"1987","unstructured":"Anthony O'Hagan. 1987. Monte Carlo is fundamentally unsound. The Statistician (1987), 247--249."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/0378-3758(91)90002-V"},{"key":"e_1_3_2_1_21_1","volume-title":"Kernel-based reinforcement learning. Machine learning 49, 2","author":"Ormoneit Dirk","year":"2002","unstructured":"Dirk Ormoneit and \u015aaunak Sen. 2002. Kernel-based reinforcement learning. Machine learning 49, 2 (2002), 161--178."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Kei Ota Devesh K. Jha Tomoaki Oiki Mamoru Miura Takashi Nammoto Daniel Nikovski and Toshisada Mariyama. 2019. Trajectory Optimization for Unknown Constrained Systems using Reinforcement Learning. (2019) 3487--3494. arXiv:1903.05751 http:\/\/arxiv.org\/abs\/1903.05751","DOI":"10.1109\/IROS40897.2019.8968010"},{"key":"e_1_3_2_1_23_1","volume-title":"Juan Andres Bazerque, and Alejandro Ribeiro","author":"Paternain Santiago","year":"2020","unstructured":"Santiago Paternain, Juan Andres Bazerque, and Alejandro Ribeiro. 2020. Policy Gradient for Continuing Tasks in Non-stationary Markov Decision Processes. arXiv:cs.LG\/2010.08443 https:\/\/arxiv.org\/abs\/2010.08443"},{"key":"e_1_3_2_1_24_1","volume-title":"Juan Andres Bazerque, and Alejandro Ribeiro","author":"Paternain Santiago","year":"2021","unstructured":"Santiago Paternain, Juan Andres Bazerque, and Alejandro Ribeiro. 2021. Stochastic policy gradient ascent in reproducing kernel hilbert spaces. Vol. 66. Transactions on Automatic Control. https:\/\/arxiv.org\/abs\/2010.08443"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CASE49439.2021.9551472"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968452"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793824"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-017-9579-x"},{"volume-title":"Random gradient-free minimization of convex functions","author":"Nesterov V. Spokoiny Y.","key":"e_1_3_2_1_29_1","unstructured":"V. Spokoiny Y. Nesterov. 2017. Random gradient-free minimization of convex functions. Vol. 17 no. 2. Foundations of Computational Mathematics. 527--566 pages."}],"event":{"name":"SAC '22: The 37th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Virtual Event","acronym":"SAC '22"},"container-title":["Proceedings of the 37th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3477314.3507091","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3477314.3507091","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3477314.3507091","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:28Z","timestamp":1750188688000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3477314.3507091"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,25]]},"references-count":29,"alternative-id":["10.1145\/3477314.3507091","10.1145\/3477314"],"URL":"https:\/\/doi.org\/10.1145\/3477314.3507091","relation":{},"subject":[],"published":{"date-parts":[[2022,4,25]]},"assertion":[{"value":"2022-05-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}