{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T04:23:24Z","timestamp":1749011004743,"version":"3.37.3"},"reference-count":67,"publisher":"Informa UK Limited","issue":"12","funder":[{"DOI":"10.13039\/501100009023","name":"Precursory Research for Embryonic Science and Technology","doi-asserted-by":"publisher","award":["JPMJPR20C3"],"award-info":[{"award-number":["JPMJPR20C3"]}],"id":[{"id":"10.13039\/501100009023","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1080\/01691864.2023.2208634","type":"journal-article","created":{"date-parts":[[2023,5,13]],"date-time":"2023-05-13T18:08:32Z","timestamp":1684001312000},"page":"719-736","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":4,"title":["Design of restricted normalizing flow towards arbitrary stochastic policy with computational efficiency"],"prefix":"10.1080","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3760-249X","authenticated-orcid":false,"given":"Taisuke","family":"Kobayashi","sequence":"first","affiliation":[{"name":"Principles of Informatics Research Division, National Institute of Informatics, Tokyo, Japan"},{"name":"School of Multidisciplinary Sciences, Department of Informatics, The Graduate University for Advanced Studies (SOKENDAI), Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5774-767X","authenticated-orcid":false,"given":"Takumi","family":"Aotani","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering Informatics, School of Science and Technology, Meiji University, Kanagawa, Japan"}]}],"member":"301","published-online":{"date-parts":[[2023,5,13]]},"reference":[{"key":"CIT0001","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2412554"},{"key":"CIT0002","first-page":"2","volume":"4","author":"Kobayashi T","year":"2021","journal-title":"Adv Intell Syst"},{"key":"CIT0003","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2426451"},{"key":"CIT0004","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21887"},{"key":"CIT0005","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.11.004"},{"issue":"30","key":"CIT0006","first-page":"1","volume":"22","author":"Kroemer O","year":"2021","journal-title":"J\u00a0Mach Learn Res"},{"volume-title":"Reinforcement learning: an introduction","year":"2018","author":"Sutton RS","key":"CIT0007"},{"key":"CIT0008","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"CIT0009","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"CIT0010","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"CIT0011","unstructured":"Kingma DP, Ba J. Adam: a method for stochastic optimization. arXiv preprint arXiv:14126980. 2014."},{"key":"CIT0012","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4294401"},{"key":"CIT0013","first-page":"14","author":"Kakade SM.","year":"2001","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0014","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"CIT0015","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-019-01510-8"},{"key":"CIT0016","unstructured":"Chou PW, Maturana D, Scherer S. Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution. In: International Conference on Machine Learning; PMLR; 2017. p.\u00a0834\u2013843."},{"key":"CIT0017","unstructured":"Haarnoja T, Tang H, Abbeel P, et\u00a0al. Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning; PMLR; 2017. p.\u00a01352\u20131361."},{"key":"CIT0018","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.2.246"},{"key":"CIT0019","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1993.5.2.305"},{"key":"CIT0020","unstructured":"Baram N, Tennenholtz G, Mannor S. Maximum entropy reinforcement learning with mixture policies. arXiv preprint arXiv:210310176. 2021."},{"key":"CIT0021","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2021.06.010"},{"issue":"57","key":"CIT0022","first-page":"1","volume":"22","author":"Papamakarios G","year":"2021","journal-title":"J\u00a0Mach Learn Res"},{"key":"CIT0023","first-page":"3362","volume":"33","author":"Teshima T","year":"2020","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0024","unstructured":"Kong Z, Chaudhuri K. The expressive power of a class of normalizing flow models. In: International Conference on Artificial Intelligence and Statistics; PMLR; 2020. p.\u00a03599\u20133609."},{"key":"CIT0025","unstructured":"Ward PN, Smofsky A, Bose AJ. Improving exploration in soft-actor-critic with normalizing flows policies. arXiv preprint arXiv:190602771. 2019."},{"key":"CIT0026","unstructured":"Mazoure B, Doan T, Durand A, et\u00a0al. Leveraging exploration in off-policy algorithms via normalizing flows. In: Conference on Robot Learning; PMLR; 2020. p.\u00a0430\u2013444."},{"key":"CIT0027","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-64793-3_15"},{"key":"CIT0028","unstructured":"Gambardella A, Baydin AG, Torr PH. Transflow learning: repurposing flow models without retraining. arXiv preprint arXiv:191113270. 2019."},{"key":"CIT0029","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"CIT0030","doi-asserted-by":"publisher","DOI":"10.1145\/3447648"},{"key":"CIT0031","unstructured":"Dolatabadi HM, Erfani S, Leckie C. Invertible generative modeling using linear rational splines. In: International Conference on Artificial Intelligence and Statistics; PMLR; 2020. p.\u00a04236\u20134246."},{"key":"CIT0032","unstructured":"Dinh L, Sohl-Dickstein J, Bengio S. Density estimation using real nvp.\u00a0arXiv preprint arXiv:160508803. 2016."},{"key":"CIT0033","unstructured":"Brockman G, Cheung V, Pettersson L, et\u00a0al. Openai gym. arXiv preprint arXiv:160601540. 2016."},{"key":"CIT0034","unstructured":"Coumans E, Bai Y. Pybullet, a python module for physics simulation for games, robotics and machine learning. GitHub repository; 2016."},{"key":"CIT0035","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980457"},{"key":"CIT0036","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"CIT0037","volume":"32","author":"Janner M","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0038","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3125000"},{"key":"CIT0039","unstructured":"Mnih V, Badia AP, Mirza M, et\u00a0al. Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning; PMLR; 2016. p.\u00a01928\u20131937."},{"key":"CIT0040","unstructured":"Haarnoja T, Zhou A, Abbeel P, et\u00a0al. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning; PMLR; 2018. p.\u00a01861\u20131870."},{"key":"CIT0041","unstructured":"Behrmann J, Vicol P, Wang KC, et\u00a0al. Understanding and mitigating exploding inverses in invertible neural networks. In: International Conference on Artificial Intelligence and Statistics; PMLR; 2021. p.\u00a01792\u20131800."},{"key":"CIT0042","doi-asserted-by":"publisher","DOI":"10.1016\/j.fss.2020.12.025"},{"key":"CIT0043","doi-asserted-by":"publisher","DOI":"10.1007\/s00032-008-0087-y"},{"key":"CIT0044","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560856"},{"key":"CIT0045","volume":"10","author":"Kobayashi T.","year":"2022","journal-title":"Results Control Optim"},{"key":"CIT0046","unstructured":"Schaul T, Quan J, Antonoglou I, et\u00a0al. Prioritized experience replay. arXiv preprint arXiv:151105952. 2015."},{"key":"CIT0047","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.12.023"},{"key":"CIT0048","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05788-0"},{"key":"CIT0049","unstructured":"Paszke A, Gross S, Chintala S, et\u00a0al. Automatic differentiation in pytorch. In: Advances in Neural Information Processing Systems Workshop; 2017."},{"key":"CIT0050","unstructured":"Ba JL, Kiros JR, Hinton GE. Layer normalization. arXiv preprint arXiv:160706450. 2016."},{"key":"CIT0051","first-page":"4026","volume":"29","author":"Osband I","year":"2016","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0052","doi-asserted-by":"publisher","DOI":"10.1109\/SII46433.2020.9026303"},{"key":"CIT0053","unstructured":"Okada M, Taniguchi T. Variational inference MPC for bayesian model-based reinforcement learning. In: Conference on Robot Learning; PMLR; 2020. p.\u00a0258\u2013272."},{"key":"CIT0054","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340873"},{"key":"CIT0055","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.027"},{"key":"CIT0056","unstructured":"Hafner D, Lillicrap T, Fischer I, et\u00a0al. Learning latent dynamics for planning from pixels. In: International Conference on Machine Learning; PMLR; 2019. p.\u00a02555\u20132565."},{"key":"CIT0057","doi-asserted-by":"crossref","unstructured":"Bhalodia R, Lee I, Elhabian S. dpvaes: fixing sample generation for regularized vaes. In: Proceedings of the Asian Conference on Computer Vision; 2020.","DOI":"10.1007\/978-3-030-69538-5_39"},{"key":"CIT0058","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2020.1844797"},{"key":"CIT0059","volume":"33","author":"Mohaghegh Dolatabadi H","year":"2020","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0060","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-092451"},{"key":"CIT0061","unstructured":"Pinto L, Davidson J, Sukthankar R, et\u00a0al. Robust adversarial reinforcement learning. In: International Conference on Machine Learning; PMLR; 2017. p.\u00a02817\u20132826."},{"key":"CIT0062","unstructured":"Mattheakis M, Protopapas P, Sondak D, et\u00a0al. Physical symmetries embedded in neural networks. arXiv preprint arXiv:190408991. 2019."},{"key":"CIT0063","unstructured":"Barron JT. Squareplus: a softplus-like algebraic rectifier. arXiv preprint arXiv:211211687. 2021."},{"key":"CIT0064","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2017.12.012"},{"key":"CIT0065","first-page":"31","author":"Kanai S","year":"2018","journal-title":"Adv Neural Inf Process Syst"},{"key":"CIT0066","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"CIT0067","doi-asserted-by":"publisher","DOI":"10.1186\/s40648-022-00232-w"}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2023.2208634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T09:45:20Z","timestamp":1729417520000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2023.2208634"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,13]]},"references-count":67,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,6,18]]}},"alternative-id":["10.1080\/01691864.2023.2208634"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2023.2208634","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"type":"print","value":"0169-1864"},{"type":"electronic","value":"1568-5535"}],"subject":[],"published":{"date-parts":[[2023,5,13]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2022-04-04","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-01-08","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-03-05","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}