{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T08:19:25Z","timestamp":1775722765294,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2019,6,15]],"date-time":"2019-06-15T00:00:00Z","timestamp":1560556800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,6,15]],"date-time":"2019-06-15T00:00:00Z","timestamp":1560556800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1007\/s10489-019-01510-8","type":"journal-article","created":{"date-parts":[[2019,6,15]],"date-time":"2019-06-15T05:20:27Z","timestamp":1560576027000},"page":"4335-4347","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Student-t policy in reinforcement learning to acquire global optimum of robot control"],"prefix":"10.1007","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3760-249X","authenticated-orcid":false,"given":"Taisuke","family":"Kobayashi","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,15]]},"reference":[{"key":"1510_CR1","unstructured":"Achiam J, Held D, Tamar A, Abbeel P (2017) Constrained policy optimization. In: International conference on machine learning, pp 22\u201331"},{"key":"1510_CR2","doi-asserted-by":"crossref","unstructured":"Aeschliman C, Park J, Kak AC (2010) A novel parameter estimation algorithm for the multivariate t-distribution and its application to computer vision. In: European conference on computer vision, pp 594\u2013607. Springer","DOI":"10.1007\/978-3-642-15552-9_43"},{"issue":"2","key":"1510_CR3","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"SI Amari","year":"1998","unstructured":"Amari SI (1998) Natural gradient works efficiently in learning. Neural Comput 10(2):251\u2013276","journal-title":"Neural Comput"},{"issue":"3","key":"1510_CR4","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1007\/BF03263545","volume":"68","author":"RB Arellano-Valle","year":"2010","unstructured":"Arellano-Valle RB (2010) On the information matrix of the multivariate skew-t model. Metron 68(3):371\u2013386","journal-title":"Metron"},{"issue":"5","key":"1510_CR5","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"AG Barto","year":"1983","unstructured":"Barto AG, Sutton RS, Anderson CW (1983) Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans Syst Man Cybern 13(5):834\u2013846","journal-title":"IEEE Trans Syst Man Cybern"},{"issue":"11","key":"1510_CR6","doi-asserted-by":"publisher","first-page":"3078","DOI":"10.1890\/04-1806","volume":"86","author":"F Bartumeus","year":"2005","unstructured":"Bartumeus F, da Luz ME, Viswanathan G, Catalan J (2005) Animal search strategies: A quantitative random-walk analysis. Ecology 86(11):3078\u20133087","journal-title":"Ecology"},{"key":"1510_CR7","unstructured":"Bellemare M, Srinivasan S, Ostrovski G, Schaul T, Saxton D, Munos R (2016) Unifying count-based exploration and intrinsic motivation. In: Advances in neural information processing systems, pp 1471\u20131479"},{"issue":"4","key":"1510_CR8","doi-asserted-by":"publisher","first-page":"803","DOI":"10.1016\/j.csda.2004.04.001","volume":"48","author":"L Canal","year":"2005","unstructured":"Canal L (2005) A normal approximation for the chi-square distribution. Comput Stat Data Anal 48(4):803\u2013808","journal-title":"Comput Stat Data Anal"},{"key":"1510_CR9","unstructured":"Chentanez N, Barto AG, Singh SP (2005) Intrinsically motivated reinforcement learning. In: Advances in neural information processing systems, pp 1281\u20131288"},{"key":"1510_CR10","unstructured":"Chou PW, Maturana D, Scherer S (2017) Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution. In: International conference on machine learning, pp 834\u2013843"},{"key":"1510_CR11","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1016\/j.physa.2013.10.035","volume":"395","author":"JE Contreras-Reyes","year":"2014","unstructured":"Contreras-Reyes JE (2014) Asymptotic form of the Kullback\u2013Leibler divergence for multivariate asymmetric heavy-tailed distributions. Physica A: Statistical Mechanics and its Applications 395:200\u2013208","journal-title":"Physica A: Statistical Mechanics and its Applications"},{"key":"1510_CR12","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.neunet.2017.06.007","volume":"94","author":"Y Cui","year":"2017","unstructured":"Cui Y, Matsubara T, Sugimoto K (2017) Kernel dynamic policy programming: Applicable reinforcement learning to robot systems with high dimensional states. Neural Netw 94:13\u201323","journal-title":"Neural Netw"},{"issue":"93","key":"1510_CR13","first-page":"1","volume":"17","author":"C Daniel","year":"2016","unstructured":"Daniel C, Neumann G, Kroemer O, Peters J (2016) Hierarchical relative entropy policy search. J Mach Learn Res 17(93):1\u201350","journal-title":"J Mach Learn Res"},{"key":"1510_CR14","unstructured":"Gu S, Lillicrap T, Turner RE, Ghahramani Z, Sch\u00f6lkopf B., Levine S (2017) Interpolated policy gradient: Merging on-policy and off-policy gradient estimation for deep reinforcement learning. In: Advances in neural information processing systems, pp 3849\u20133858"},{"key":"1510_CR15","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. arXiv:\n1801.01290"},{"key":"1510_CR16","unstructured":"Heess N, Sriram S, Lemmon J, Merel J, Wayne G, Tassa Y, Erez T, Wang Z, Eslami A, Riedmiller M et al (2017) Emergence of locomotion behaviours in rich environments. arXiv:\n1707.02286"},{"key":"1510_CR17","unstructured":"Hirai K, Hirose M, Haikawa Y, Takenaka T (1998) The development of honda humanoid robot. In: IEEE international conference on robotics and automation, vol 2, pp 1321\u20131326. IEEE"},{"key":"1510_CR18","unstructured":"Houthooft R, Chen X, Duan Y, Schulman J, De Turck F, Abbeel P (2016) VIME: Variational information maximizing exploration. In: Advances in neural information processing systems, pp 1109\u20131117"},{"issue":"26","key":"1510_CR19","doi-asserted-by":"publisher","first-page":"eaau5872","DOI":"10.1126\/scirobotics.aau5872","volume":"4","author":"J Hwangbo","year":"2019","unstructured":"Hwangbo J, Lee J, Dosovitskiy A, Bellicoso D, Tsounis V, Koltun V, Hutter M (2019) Learning agile and dynamic motor skills for legged robots. Sci Robot 4(26):eaau5872","journal-title":"Sci Robot"},{"key":"1510_CR20","unstructured":"Kakade SM (2002) A natural policy gradient. In: Advances in neural information processing systems, pp 1531\u20131538"},{"key":"1510_CR21","unstructured":"Kingma D, Ba J (2015) Adam: A method for stochastic optimization. In: International conference for learning representations, pp 1\u201315"},{"issue":"3","key":"1510_CR22","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1109\/TRO.2015.2426451","volume":"31","author":"T Kobayashi","year":"2015","unstructured":"Kobayashi T, Aoyama T, Sekiyama K, Fukuda T (2015) Selection algorithm for locomotion based on the evaluation of falling risk. IEEE Trans Robot 31(3):750\u2013765","journal-title":"IEEE Trans Robot"},{"issue":"408","key":"1510_CR23","first-page":"881","volume":"84","author":"KL Lange","year":"1989","unstructured":"Lange KL, Little RJ, Taylor JM (1989) Robust statistical modeling using the t distribution. J Am Stat Assoc 84(408):881\u2013896","journal-title":"J Am Stat Assoc"},{"key":"1510_CR24","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv:\n1509.02971"},{"key":"1510_CR25","first-page":"2579","volume":"9","author":"LVD Maaten","year":"2008","unstructured":"Maaten LVD, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9:2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"1510_CR26","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, pp 1928\u20131937"},{"issue":"7540","key":"1510_CR27","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"1510_CR28","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations: Theory and application to reward shaping. In: International conference on machine learning, vol 99, pp 278\u2013287"},{"key":"1510_CR29","doi-asserted-by":"crossref","unstructured":"Rohmer E, Singh SP, Freese M (2013) V-rep: A versatile and scalable robot simulation framework. In: IEEE\/RSJ international conference on intelligent robots and systems, pp 1321\u20131326. IEEE","DOI":"10.1109\/IROS.2013.6696520"},{"key":"1510_CR30","unstructured":"Schulman J, Moritz P, Levine S, Jordan M, Abbeel P (2016) High-dimensional continuous control using generalized advantage estimation. In: International conference for learning representations, pp 1\u201314"},{"key":"1510_CR31","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv:\n1707.06347"},{"key":"1510_CR32","unstructured":"Shah A, Wilson A, Ghahramani Z (2014) Student-t processes as alternatives to gaussian processes. In: Artificial intelligence and statistics, pp 877\u2013885"},{"key":"1510_CR33","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning, pp 387\u2013395"},{"key":"1510_CR34","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: An introduction. MIT Press, Cambridge"},{"key":"1510_CR35","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1016\/j.neucom.2004.11.018","volume":"64","author":"M Svens\u00e9n","year":"2005","unstructured":"Svens\u00e9n M, Bishop CM (2005) Robust bayesian mixture modelling. Neurocomputing 64:235\u2013252","journal-title":"Neurocomputing"},{"key":"1510_CR36","unstructured":"Thomas P (2014) Bias in natural actor-critic algorithms. In: International conference on machine learning, pp 441\u2013448"},{"key":"1510_CR37","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.robot.2018.11.004","volume":"112","author":"Y Tsurumine","year":"2019","unstructured":"Tsurumine Y, Cui Y, Uchibe E, Matsubara T (2019) Deep reinforcement learning with smooth policy update: Application to robotic cloth manipulation. Robot Auton Syst 112:72\u201383","journal-title":"Robot Auton Syst"},{"issue":"145","key":"1510_CR38","first-page":"1","volume":"17","author":"H Van Seijen","year":"2016","unstructured":"Van Seijen H, Mahmood AR, Pilarski PM, Machado MC, Sutton RS (2016) True online temporal-difference learning. J Mach Learn Res 17(145):1\u201340","journal-title":"J Mach Learn Res"},{"issue":"3-4","key":"1510_CR39","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3-4):229\u2013256","journal-title":"Mach Learn"},{"issue":"2","key":"1510_CR40","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s10489-018-1296-x","volume":"49","author":"X Zhao","year":"2019","unstructured":"Zhao X, Ding S, An Y, Jia W (2019) Applications of asynchronous deep reinforcement learning based on dynamic updating weights. Appl Intell 49(2):581\u2013591","journal-title":"Appl Intell"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01510-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-019-01510-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-019-01510-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,13]],"date-time":"2020-06-13T23:28:10Z","timestamp":1592090890000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-019-01510-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,15]]},"references-count":40,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["1510"],"URL":"https:\/\/doi.org\/10.1007\/s10489-019-01510-8","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,6,15]]},"assertion":[{"value":"15 June 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}