{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:25:14Z","timestamp":1730204714754,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,6]]},"DOI":"10.1109\/cdc51059.2022.9992486","type":"proceedings-article","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T19:26:56Z","timestamp":1673378816000},"page":"3359-3364","source":"Crossref","is-referenced-by-count":0,"title":["Sampling matters: SGD smoothing through importance sampling"],"prefix":"10.1109","author":[{"given":"Luca","family":"Zancato","sequence":"first","affiliation":[{"name":"University of Padova,Department of Information Engineering,Padova,Italy,35131"}]},{"given":"Alessandro","family":"Chiuso","sequence":"additional","affiliation":[{"name":"University of Padova,Department of Information Engineering,Padova,Italy,35131"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ita.2018.8503149"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1090\/s0002-9947-1950-0051437-7"},{"key":"ref3","first-page":"316","article-title":"Exponentially many local minima for single neurons","volume-title":"NeurIPS 1996","author":"Auer","year":"1996"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1137\/16m1080173"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ITA.2018.8503224"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/BF02551274"},{"key":"ref7","first-page":"1019","article-title":"Sharp minima can generalize for deep nets","volume-title":"34th ICML","volume":"70","author":"Dinh"},{"article-title":"Essentially no barriers in neural network energy landscape","volume-title":"Proceedings of the 35th ICML","author":"Draxler","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1137\/0710036"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.1.1"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.4310\/amsa.2019.v4.n1.a1"},{"article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume-title":"Proceedings of the 32nd ICML","author":"Ioffe","key":"ref13"},{"article-title":"How to escape saddle points efficiently","volume-title":"Proceedings of the 34th ICML","author":"Jin","key":"ref14"},{"key":"ref15","article-title":"On the local minima of the empirical risk","author":"Jin","year":"2018","journal-title":"NeurIPS 31"},{"key":"ref16","article-title":"Not all samples are created equal: Deep learning with importance sampling","author":"Katharopoulos","year":"2018","journal-title":"ICML"},{"volume-title":"Learning multiple layers of features from tiny images","year":"2012","author":"Krizhevsky","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"article-title":"Stochastic modified equations and adaptive stochastic gradient algorithms","volume-title":"Proceedings of the 34th ICML 2017","author":"Qianxiao","key":"ref19"},{"key":"ref20","article-title":"Visualizing the loss landscape of neural nets","author":"Li","year":"2018","journal-title":"NeurIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1806579115"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10444-004-7634-z"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1137\/1116025"},{"key":"ref24","article-title":"Theory II: landscape of the empirical risk in deep learning","author":"Poggio","year":"2017","journal-title":"CoRR"},{"key":"ref25","article-title":"Theory of deep learning III: explaining the non-overfitting puzzle","author":"Poggio","year":"2018","journal-title":"CoRR"},{"volume-title":"Monte Carlo Statistical Methods (Springer Texts in Statistics)","year":"2005","author":"Robert","key":"ref26"},{"key":"ref27","first-page":"1929","article-title":"Dropout: A simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"JMLR"},{"volume-title":"Statistical Learning Theory","year":"1998","author":"Vapnik","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3446776"},{"article-title":"Stochastic optimization with importance sampling for regularized loss minimization","volume-title":"Proceedings of the 32nd ICML, Proceedings of Machine Learning Research","author":"Zhao","key":"ref30"}],"event":{"name":"2022 IEEE 61st Conference on Decision and Control (CDC)","start":{"date-parts":[[2022,12,6]]},"location":"Cancun, Mexico","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE 61st Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9992315\/9992317\/09992486.pdf?arnumber=9992486","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T09:28:26Z","timestamp":1709371706000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9992486\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,6]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/cdc51059.2022.9992486","relation":{},"subject":[],"published":{"date-parts":[[2022,12,6]]}}}