{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T08:05:59Z","timestamp":1769069159665,"version":"3.49.0"},"reference-count":33,"publisher":"Informa UK Limited","issue":"6","funder":[{"DOI":"10.13039\/501100001691","name":"JSPS KAKENHI","doi-asserted-by":"publisher","award":["JP21H04875"],"award-info":[{"award-number":["JP21H04875"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2024,3,18]]},"DOI":"10.1080\/01691864.2024.2309621","type":"journal-article","created":{"date-parts":[[2024,2,7]],"date-time":"2024-02-07T18:24:09Z","timestamp":1707330249000},"page":"398-409","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":2,"title":["Density estimation based soft actor-critic: deep reinforcement learning for static output feedback control with measurement noise"],"prefix":"10.1080","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2521-8589","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"first","affiliation":[{"name":"Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4965-2459","authenticated-orcid":false,"given":"Ye","family":"Tian","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2963-2584","authenticated-orcid":false,"given":"Kenji","family":"Kashima","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2024,2,7]]},"reference":[{"key":"e_1_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.03.037"},{"key":"e_1_3_3_3_1","volume-title":"Reinforcement learning: an introduction","author":"Sutton RS","year":"2018","unstructured":"Sutton RS, Barto AG. Reinforcement learning: an introduction. Cambridge (MA): MIT Press; 2018."},{"key":"e_1_3_3_4_1","unstructured":"Lillicrap TP Hunt JJ Pritzel A et\u00a0al. Continuous control with deep reinforcement learning. Preprint arXiv:150902971. 2015."},{"key":"e_1_3_3_5_1","unstructured":"Fujimoto S Hoof H Meger D. Addressing function approximation error in actor-critic methods. International Conference on Machine Learning; 2018; p. 1582\u20131591."},{"key":"e_1_3_3_6_1","unstructured":"Schulman J Wolski F Dhariwal P et\u00a0al. Proximal policy optimization algorithms. Preprint arXiv:170706347. 2017."},{"key":"e_1_3_3_7_1","unstructured":"Haarnoja T Zhou A Abbeel P et\u00a0al. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. International Conference on Machine Learning; PMLR; 2018; p. 1861\u20131870."},{"key":"e_1_3_3_8_1","unstructured":"Liu Q Chung A Szepesv\u00e1ri C et\u00a0al. When is partially observable reinforcement learning not scary?. Conference on Learning Theory; PMLR; 2022; p. 5175\u20135220."},{"issue":"26","key":"e_1_3_3_9_1","first-page":"1","article-title":"POMDPs.jl: a framework for sequential decision making under uncertainty","volume":"18","author":"Egorov M","year":"2017","unstructured":"Egorov M, Sunberg ZN, Balaban E, et\u00a0al. POMDPs.jl: a framework for sequential decision making under uncertainty. J Mach Learn Res. 2017;18(26):1\u20135. Available from: http:\/\/jmlr.org\/papers\/v18\/16-300.html.","journal-title":"J Mach Learn Res"},{"key":"e_1_3_3_10_1","doi-asserted-by":"crossref","unstructured":"Sunberg Z Kochenderfer M. Online algorithms for POMDPs with continuous state action and observation spaces. Proceedings of the International Conference on Automated Planning and Scheduling; 2018; Vol. 28. p. 259\u2013263.","DOI":"10.1609\/icaps.v28i1.13882"},{"key":"e_1_3_3_11_1","doi-asserted-by":"crossref","unstructured":"Takakura S Sato K. Structured output feedback control for linear quadratic regulator using policy gradient method. IEEE Transactions on Automatic Control; 2023.","DOI":"10.1109\/TAC.2023.3264176"},{"key":"e_1_3_3_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2022.105366"},{"key":"e_1_3_3_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110581"},{"key":"e_1_3_3_14_1","doi-asserted-by":"publisher","DOI":"10.1137\/20M1329858"},{"issue":"2","key":"e_1_3_3_15_1","first-page":"205","article-title":"Static output feedback controller design","volume":"37","author":"Vesel\u00e1\u017c\u015f V.","year":"2001","unstructured":"Vesel\u00e1\u017c\u015f V. Static output feedback controller design. Kybernetika. 2001;37(2):205\u2013221.","journal-title":"Kybernetika"},{"key":"e_1_3_3_16_1","first-page":"21024","article-title":"Robust deep reinforcement learning against adversarial perturbations on state observations","volume":"33","author":"Zhang H","year":"2020","unstructured":"Zhang H, Chen H, Xiao C, et\u00a0al. Robust deep reinforcement learning against adversarial perturbations on state observations. Adv Neural Inf Process Syst. 2020;33:21024\u201321037.","journal-title":"Adv Neural Inf Process Syst"},{"key":"e_1_3_3_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2382559.2382563"},{"key":"e_1_3_3_18_1","volume-title":"Introduction to probability, statistics and random processes","author":"Pishro-Nik H.","year":"2014","unstructured":"Pishro-Nik H. Introduction to probability, statistics and random processes. Cambridge (MA): Kappa Research, LLC; 2014."},{"key":"e_1_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1002\/wics.2019.11.issue-4"},{"key":"e_1_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1080\/24709360.2017.1396742"},{"key":"e_1_3_3_21_1","first-page":"2338","article-title":"Masked autoregressive flow for density estimation","volume":"30","author":"Papamakarios G","year":"2017","unstructured":"Papamakarios G, Pavlakou T, Murray I. Masked autoregressive flow for density estimation. Adv Neural Inf Process Syst. 2017;30:2338\u20132347.","journal-title":"Adv Neural Inf Process Syst"},{"key":"e_1_3_3_22_1","unstructured":"Germain M Gregor K Murray I et\u00a0al. Made: Masked autoencoder for distribution estimation. International Conference on Machine Learning; PMLR; 2015; p. 881\u2013889."},{"key":"e_1_3_3_23_1","unstructured":"Ioffe S Szegedy C. Batch normalization: accelerating deep network training by reducing internal covariate shift. International Conference on Machine Learning; PMLR; 2015; p. 448\u2013456."},{"key":"e_1_3_3_24_1","unstructured":"Sutton RS McAllester D Singh S et\u00a0al. Policy gradient methods for reinforcement learning with function approximation. In: Solla S Leen T M\u00fcller K editors. Advances in Neural Information Processing Systems; Vol. 12. Cambridge (MA): MIT Press; 1999."},{"key":"e_1_3_3_25_1","unstructured":"Silver D Lever G Heess N et\u00a0al. Deterministic policy gradient algorithms. International Conference on Machine Learning; PMLR; 2014; p. 387\u2013395."},{"key":"e_1_3_3_26_1","unstructured":"Haarnoja T Zhou A Hartikainen K et\u00a0al. Soft actor-critic algorithms and applications. Preprint arXiv:181205905. 2018."},{"key":"e_1_3_3_27_1","unstructured":"Gu S Holly E Lillicrap TP et\u00a0al. Deep reinforcement learning for robotic manipulation. Preprint arXiv:161000633. 2016;1:1."},{"key":"e_1_3_3_28_1","unstructured":"Rusu AA Ve\u010der\u00edk M Roth\u00f6rl T et\u00a0al. Sim-to-real robot learning from pixels with progressive nets. Conference on Robot Learning; PMLR; 2017; p. 262\u2013270."},{"key":"e_1_3_3_29_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"e_1_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.8127026"},{"key":"e_1_3_3_31_1","unstructured":"Coumans E Bai Y. Pybullet a python module for physics simulation for games robotics and machine learning [http:\/\/pybullet.org]; 2016\u20132021."},{"key":"e_1_3_3_32_1","unstructured":"Zhong J Gupta A Power T. Um-arm-lab\/pytorch_kinematics: v0.5.4 ; 2023."},{"issue":"268","key":"e_1_3_3_33_1","first-page":"1","article-title":"Stable-baselines3: reliable reinforcement learning implementations","volume":"22","author":"Raffin A","year":"2021","unstructured":"Raffin A, Hill A, Gleave A, et\u00a0al. Stable-baselines3: reliable reinforcement learning implementations. J Mach Learn Res. 2021;22(268):1\u20138. Available at http:\/\/jmlr.org\/papers\/v22\/20-1364.html.","journal-title":"J Mach Learn Res"},{"key":"e_1_3_3_34_1","unstructured":"Raffin A Kober J Stulp F. Smooth exploration for robotic reinforcement learning. Conference on Robot Learning; PMLR; 2022; p. 1634\u20131644."}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2024.2309621","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T19:30:46Z","timestamp":1727206246000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2024.2309621"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,7]]},"references-count":33,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,3,18]]}},"alternative-id":["10.1080\/01691864.2024.2309621"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2024.2309621","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"value":"0169-1864","type":"print"},{"value":"1568-5535","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,7]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2023-09-25","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-12-26","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-01-16","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-02-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}