{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T06:06:21Z","timestamp":1749017181526,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/icdm.2016.0022","type":"proceedings-article","created":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T15:39:50Z","timestamp":1486481990000},"page":"111-120","source":"Crossref","is-referenced-by-count":19,"title":["Efficient Distributed SGD with Variance Reduction"],"prefix":"10.1109","author":[{"given":"Soham","family":"De","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tom","family":"Goldstein","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref32","volume":"87","author":"nesterov","year":"2013","journal-title":"Introductory Lectures on Convex Optimization A Basic Course"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/ncomms5308"},{"key":"ref30","article-title":"The million song dataset","author":"bertin-mahieux","year":"2011","journal-title":"Proceedings of the 12th International Conference on Music Information Retrieval (ISMIR 2011)"},{"key":"ref10","first-page":"2595","article-title":"Parallelized stochastic gradient descent","author":"zinkevich","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref11","first-page":"685","article-title":"Deep learning with elastic averaging sgd","author":"zhang","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref12","first-page":"1","article-title":"Dsa: Decentralized double stochastic averaging gradient algorithm","volume":"17","author":"mokhtari","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref13","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","author":"johnson","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref14","first-page":"1646","article-title":"Saga: A fast incremental gradient method with support for non-strongly convex composite objectives","author":"defazio","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref15","first-page":"2629","article-title":"On variance reduction in stochastic gradient descent and its asynchronous variants","author":"reddi","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref16","first-page":"2663","article-title":"A stochastic gradient method with an exponential convergence _rate for finite training sets","author":"roux","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref17","first-page":"1125","article-title":"Finito: A faster, permutable incremental gradient method for big data problems","author":"defazio","year":"2014","journal-title":"Proceedings of the 31st International Conference on Machine Learning"},{"key":"ref18","article-title":"ms2gd: Mini-batch semi-stochastic gradient descent in the proximal setting","author":"kone?n\u00fd","year":"2014","journal-title":"arXiv preprint arXiv 1410 4744"},{"key":"ref19","article-title":"Semi-stochastic gradient descent methods","author":"kone?n\u00fd","year":"2013","journal-title":"arXiv preprint arXiv 1312 1666"},{"key":"ref28","article-title":"Without-replacement sampling for stochastic gradient methods: Convergence results and application to distributed optimization","author":"shamir","year":"2016","journal-title":"arXiv preprint arXiv 1603 02895"},{"key":"ref4","first-page":"2719","article-title":"Asynchronous parallel stochastic gradient for nonconvex optimization","author":"lian","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref27","article-title":"Why random reshuffling beats stochastic gradient descent","author":"g\u00fcrb\u00fczbalaban","year":"2015","journal-title":"arXiv preprint arXiv 1510 08560"},{"key":"ref3","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref6","first-page":"19","article-title":"Communication efficient distributed machine learning with the parameter server","author":"li","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref29","article-title":"Ijcnn 2001 neural network competition","volume":"1","author":"prokhorov","year":"2001","journal-title":"Slide Presentation in IJCNN 01"},{"key":"ref5","first-page":"873","article-title":"Distributed delayed stochastic optimization","author":"agarwal","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref8","first-page":"2331","article-title":"Slow learners are fast","author":"zinkevich","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2014.7028543"},{"key":"ref2","first-page":"693","article-title":"Hogwild: A lock-free approach to parallelizing stochastic gradient descent","author":"recht","year":"2011","journal-title":"Advances in neural information processing systems"},{"journal-title":"Parallel and Distributed Computation Numerical Methods","year":"1989","author":"bertsekas","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1137\/140961791"},{"key":"ref22","first-page":"2242","article-title":"Stop wasting my gradients: Practical svrg","author":"harikandeh","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"181","article-title":"Variance reduction for stochastic gradient optimization","author":"wang","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref24","article-title":"Cyclades: Conflict-free asynchronous machine learning","author":"pan","year":"2016","journal-title":"arXiv preprint arXiv 1605 01584"},{"key":"ref23","article-title":"Perturbed iterate analysis for asynchronous stochastic optimization","author":"mania","year":"2015","journal-title":"arXiv preprint arXiv 1507 06970"},{"key":"ref26","first-page":"421","article-title":"Stochastic gradient descent tricks","year":"2012","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref25","article-title":"Curiously fast convergence of some stochastic gradient descent algorithms","author":"bottou","year":"2009","journal-title":"Proceedings of the symposium on learning and data science Paris"}],"event":{"name":"2016 IEEE 16th International Conference on Data Mining (ICDM)","start":{"date-parts":[[2016,12,12]]},"location":"Barcelona, Spain","end":{"date-parts":[[2016,12,15]]}},"container-title":["2016 IEEE 16th International Conference on Data Mining (ICDM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7837023\/7837813\/07837835.pdf?arnumber=7837835","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,12,13]],"date-time":"2017-12-13T16:00:46Z","timestamp":1513180846000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7837835\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/icdm.2016.0022","relation":{},"subject":[],"published":{"date-parts":[[2016,12]]}}}