{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,16]],"date-time":"2025-03-16T04:12:18Z","timestamp":1742098338745,"version":"3.38.0"},"reference-count":24,"publisher":"Informa UK Limited","issue":"5","funder":[{"DOI":"10.13039\/501100001691","name":"JSPS KAKENHI","doi-asserted-by":"publisher","award":["JP21H04875"],"award-info":[{"award-number":["JP21H04875"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Nihon Gakujutsu Shinkokai"}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2025,3,4]]},"DOI":"10.1080\/01691864.2025.2468215","type":"journal-article","created":{"date-parts":[[2025,2,24]],"date-time":"2025-02-24T21:46:46Z","timestamp":1740433606000},"page":"259-272","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":0,"title":["Deep reinforcement learning for static noisy state feedback control with reward estimation"],"prefix":"10.1080","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2521-8589","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"first","affiliation":[{"name":"Kyoto University","place":["Kyoto, Japan"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2963-2584","authenticated-orcid":false,"given":"Kenji","family":"Kashima","sequence":"additional","affiliation":[{"name":"Kyoto University","place":["Kyoto, Japan"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2025,2,24]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3207346"},{"key":"e_1_3_4_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2382559.2382563"},{"key":"e_1_3_4_4_1","volume-title":"Modern control engineering","author":"Ogata K.","year":"2010","unstructured":"Ogata K. Modern control engineering. Upper Saddle River (NJ): Prentice Hall; 2010."},{"issue":"46","key":"e_1_3_4_5_1","first-page":"3736","article-title":"Kalman and extended kalman filters: concept, derivation and properties","volume":"43","author":"Ribeiro MI.","year":"2004","unstructured":"Ribeiro MI. Kalman and extended kalman filters: concept, derivation and properties. Inst Syst Robot. 2004;43(46):3736\u20133741.","journal-title":"Inst Syst Robot"},{"issue":"26","key":"e_1_3_4_6_1","first-page":"1","article-title":"POMDPs.jl: A framework for sequential decision making under uncertainty","volume":"18","author":"Egorov M","year":"2017","unstructured":"Egorov M, Sunberg ZN, Balaban E, et\u00a0al. POMDPs.jl: A framework for sequential decision making under uncertainty. J Mach Learn Res. 2017;18(26):1\u20135. Available from: http:\/\/jmlr.org\/papers\/v18\/16-300.html.","journal-title":"J Mach Learn Res"},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"e_1_3_4_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v28i1.13882"},{"key":"e_1_3_4_9_1","first-page":"1639","volume-title":"29th Annual Conference on Learning Theory","author":"Azizzadenesheli K","year":"2016","unstructured":"Azizzadenesheli K, Lazaric A, Anandkumar A. Open problem: Approximate planning of POMDPs in the class of memoryless policies. 29th Annual Conference on Learning Theory. New York, USA: PMLR; 2016. p. 1639\u20131642. Available from: https:\/\/proceedings.mlr.press\/v49\/azizzadenesheli16b.html."},{"key":"e_1_3_4_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2010.12.014"},{"key":"e_1_3_4_11_1","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2024.2309621"},{"key":"e_1_3_4_12_1","volume-title":"Multi-agent reinforcement learning: foundations and modern approaches","author":"Albrecht SV","year":"2024","unstructured":"Albrecht SV, Christianos F, Sch\u00e4fer L. Multi-agent reinforcement learning: foundations and modern approaches. Cambridge, MA; London: MIT Press; 2024. Available from: https:\/\/www.marl-book.com."},{"key":"e_1_3_4_13_1","first-page":"5175","volume-title":"Proceedings of Thirty Fifth Conference on Learning Theory","author":"Liu Q","year":"2022","unstructured":"Liu Q, Chung A, Szepesv\u00e1ri C, et al. When is partially observable reinforcement learning not scary? Proceedings of Thirty Fifth Conference on Learning Theory. London, UK: PMLR; 2022. p. 5175\u20135220. Available from: https:\/\/proceedings.mlr.press\/v178\/liu22f.html."},{"key":"e_1_3_4_14_1","doi-asserted-by":"publisher","DOI":"10.1080\/24709360.2017.1396742"},{"key":"e_1_3_4_15_1","unstructured":"Brockman G Cheung V Pettersson L et\u00a0al.OpenAI Gym; 2016. https:\/\/arxiv.org\/abs\/1606.01540."},{"key":"e_1_3_4_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3270034"},{"key":"e_1_3_4_17_1","doi-asserted-by":"publisher","DOI":"10.2307\/2331932"},{"key":"e_1_3_4_18_1","volume-title":"Reinforcement learning: an introduction","author":"Sutton RS","year":"2018","unstructured":"Sutton RS, Barto AG. Reinforcement learning: an introduction. Cambridge, MA, USA: A Bradford Book; 2018."},{"key":"e_1_3_4_19_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_4_20_1","unstructured":"Schulman J Levine S Abbeel P et\u00a0al. Trust region policy optimization. In: Bach F Blei D editors. Proceedings of the 32nd International Conference on Machine Learning; Jul 07\u201309; Lille France: PMLR; 2015. p.\u00a01889\u20131897. (Proceedings of Machine Learning Research; Vol. 37). https:\/\/proceedings.mlr.press\/v37\/schulman15.html."},{"issue":"268","key":"e_1_3_4_21_1","first-page":"1","article-title":"Stable-baselines3: reliable reinforcement learning implementations","volume":"22","author":"Raffin A","year":"2021","unstructured":"Raffin A, Hill A, Gleave A, et\u00a0al. Stable-baselines3: reliable reinforcement learning implementations. J Mach Learn Res. 2021;22(268):1\u20138. Available from: http:\/\/jmlr.org\/papers\/v22\/20-1364.html","journal-title":"J Mach Learn Res"},{"key":"e_1_3_4_22_1","first-page":"1928","volume-title":"Proceedings of The 33rd International Conference on Machine Learning","author":"Mnih V","year":"2016","unstructured":"Mnih V, Badia AP, Mirza M, et al. Asynchronous methods for deep reinforcement learning. In: Balcan MF, Weinberger KQ, editors. Proceedings of The 33rd International Conference on Machine Learning. New York, USA: PMLR; 2016. p. 1928\u20131937. Available from: https:\/\/proceedings.mlr.press\/v48\/mniha16.html."},{"key":"e_1_3_4_23_1","unstructured":"Schulman J Wolski F Dhariwal P et\u00a0al. Proximal policy optimization algorithms. preprint 2017. arXiv:170706347."},{"key":"e_1_3_4_24_1","unstructured":"Lillicrap TP Hunt JJ Pritzel A et\u00a0al. Continuous control with deep reinforcement learning. preprint 2015. arXiv:150902971."},{"key":"e_1_3_4_25_1","first-page":"1861","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"Haarnoja T","year":"2018","unstructured":"Haarnoja T, Zhou A, Abbeel P, et al.\u00a0Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy J, Krause A, editors. Proceedings of the 35th International Conference on Machine Learning. Stockholmsm\u00e4ssan, Stockholm: PMLR; 2018. p. 1861\u20131870. Available from: https:\/\/proceedings.mlr.press\/v80\/haarnoja18b.html."}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2025.2468215","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T09:36:11Z","timestamp":1742031371000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2025.2468215"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,24]]},"references-count":24,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,3,4]]}},"alternative-id":["10.1080\/01691864.2025.2468215"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2025.2468215","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"type":"print","value":"0169-1864"},{"type":"electronic","value":"1568-5535"}],"subject":[],"published":{"date-parts":[[2025,2,24]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-10-15","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-01-19","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-01-30","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-02-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}