{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T21:51:17Z","timestamp":1768686677393,"version":"3.49.0"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T00:00:00Z","timestamp":1646092800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1109\/tnnls.2020.3041755","type":"journal-article","created":{"date-parts":[[2020,12,17]],"date-time":"2020-12-17T01:56:05Z","timestamp":1608170165000},"page":"1324-1337","source":"Crossref","is-referenced-by-count":50,"title":["Robust Stochastic Gradient Descent With Student-t Distribution Based First-Order Momentum"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1518-2519","authenticated-orcid":false,"given":"Wendyam Eric Lionel","family":"Ilboudo","sequence":"first","affiliation":[{"name":"Division of Information Science, Nara Institute of Science and Technology, Nara, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3760-249X","authenticated-orcid":false,"given":"Taisuke","family":"Kobayashi","sequence":"additional","affiliation":[{"name":"Division of Information Science, Nara Institute of Science and Technology, Nara, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5149-5643","authenticated-orcid":false,"given":"Kenji","family":"Sugimoto","sequence":"additional","affiliation":[{"name":"Division of Information Science, Nara Institute of Science and Technology, Nara, Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref4","first-page":"9094","article-title":"Robot learning in homes: Improving generalization and reducing dataset bias","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Gupta"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793917"},{"key":"ref6","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref7","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref8","article-title":"On the convergence of adam and beyond","author":"Reddi","year":"2019","journal-title":"arXiv:1904.09237"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05802-5"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-45528-0"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"ref12","first-page":"543","article-title":"A method for unconstrained convex minimization problem with the rate of convergence $O(1\/k^{2})$","volume-title":"Proc. Doklady","volume":"269","author":"Nesterov"},{"key":"ref13","first-page":"2663","article-title":"A stochastic gradient method with an exponential convergence _rate for finite training sets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Roux"},{"key":"ref14","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Johnson"},{"key":"ref15","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref16","article-title":"ADADELTA: An adaptive learning rate method","author":"Zeiler","year":"2012","journal-title":"arXiv:1212.5701"},{"issue":"2","key":"ref17","first-page":"26","article-title":"Lecture 6.5-RMSPROP: Divide the gradient by a running average of its recent magnitude","volume":"4","author":"Tieleman","year":"2012","journal-title":"Neural Netw. Mach. Learn."},{"key":"ref18","article-title":"Adaptive gradient methods with dynamic bound of learning rate","author":"Luo","year":"2019","journal-title":"arXiv:1902.09843"},{"key":"ref19","article-title":"Adaptive learning rates and parallelization for stochastic, sparse, non-smooth gradients","author":"Schaul","year":"2013","journal-title":"arXiv:1301.3764"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7965845"},{"key":"ref21","article-title":"Robust and adaptive online time series prediction with long short-term memory","volume":"2017","author":"Haimin","year":"2017","journal-title":"Comput. Intell. Neurosci."},{"key":"ref22","article-title":"Robust empirical mean estimators","author":"Lerasle","year":"2011","journal-title":"arXiv:1112.3914"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3150\/14-BEJ645"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.4171\/jems\/937"},{"issue":"1","key":"ref25","first-page":"543","article-title":"Loss minimization and parameter estimation with heavy tails","volume":"17","author":"Hsu","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1214\/15-aos1350"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3308809.3308857"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1111\/rssb.12364"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1080\/03610929508831664"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1080\/03610926.2018.1445861"},{"key":"ref31","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv:1711.05101"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2955777"},{"key":"ref33","article-title":"On the variance of the adaptive learning rate and beyond","author":"Liu","year":"2019","journal-title":"arXiv:1908.03265"},{"key":"ref34","article-title":"Closing the generalization gap of adaptive gradient methods in training deep neural networks","author":"Chen","year":"2018","journal-title":"arXiv:1806.06763"},{"key":"ref35","first-page":"9793","article-title":"Adaptive methods for nonconvex optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zaheer"},{"key":"ref36","article-title":"LaProp: Separating momentum and adaptivity in adam","author":"Ziyin","year":"2020","journal-title":"arXiv:2002.04839"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-010-9156-z"},{"key":"ref38","first-page":"2217","article-title":"Understanding and improving convolutional neural networks via concatenated rectified linear units","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Shang"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref40","article-title":"Train faster, generalize better: Stability of stochastic gradient descent","author":"Hardt","year":"2015","journal-title":"arXiv:1509.01240"},{"key":"ref41","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref42","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.11.004"},{"key":"ref44","volume-title":"Pybullet, a Python Module for Physics Simulation for Games, Robotics and Machine Learning","author":"Coumans","year":"2016"},{"key":"ref45","article-title":"Rlpyt: A research code base for deep reinforcement learning in PyTorch","author":"Stooke","year":"2019","journal-title":"arXiv:1909.01500"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1038\/174270a0"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1958.10501454"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9722951\/09296551.pdf?arnumber=9296551","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:36:01Z","timestamp":1704843361000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9296551\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3]]},"references-count":47,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2020.3041755","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3]]}}}