{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:32:14Z","timestamp":1767339134279,"version":"3.41.0"},"publisher-location":"New York, New York, USA","reference-count":35,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1145\/3041021.3051099","type":"proceedings-article","created":{"date-parts":[[2018,1,11]],"date-time":"2018-01-11T18:39:25Z","timestamp":1515695965000},"page":"913-915","source":"Crossref","is-referenced-by-count":16,"title":["Distributed Machine Learning"],"prefix":"10.1145","author":[{"given":"Tie-Yan","family":"Liu","sequence":"first","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"given":"Wei","family":"Chen","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"given":"Taifeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]}],"member":"320","reference":[{"key":"key-10.1145\/3041021.3051099-1","doi-asserted-by":"crossref","unstructured":"Ghoting Amol, et al. SystemML: Declarative machine learning on MapReduce. ICDE 2011.","DOI":"10.1109\/ICDE.2011.5767930"},{"key":"key-10.1145\/3041021.3051099-2","unstructured":"Mart&#237;n Abadi, et al. Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv:1603.04467 (2016)."},{"key":"key-10.1145\/3041021.3051099-3","unstructured":"Olivier Bousquet and Leon Bottou. The tradeoffs of large scale learning. NIPS 2008."},{"key":"key-10.1145\/3041021.3051099-4","doi-asserted-by":"crossref","unstructured":"Stephen Boyd, et al. Distributed optimization and statistical learning via the alternating direction method of multipliers. Foundations and Trends in Machine Learning, 2011.","DOI":"10.1561\/2200000016"},{"key":"key-10.1145\/3041021.3051099-5","unstructured":"Jianmin Chen, Rajat Monga, Samy Bengio, and Rafal Jozefowicz. Revisiting distributed synchronous SGD. arXiv:1604.00981 (2016)."},{"key":"key-10.1145\/3041021.3051099-6","doi-asserted-by":"crossref","unstructured":"Kai Chen and Qiang Huo, Scalable training of deep learning machines by incremental block training with intra-block parallel optimization and blockwise model-update filtering, ICASSP 2016","DOI":"10.1109\/ICASSP.2016.7472805"},{"key":"key-10.1145\/3041021.3051099-7","unstructured":"Jeffrey Dean, et al. Large scale distributed deep networks. NIPS 2012."},{"key":"key-10.1145\/3041021.3051099-8","unstructured":"Aaron Defazio, et al. SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives. NIPS 2014."},{"key":"key-10.1145\/3041021.3051099-9","unstructured":"Fei Gao, et al. http:\/\/www.dmtk.io."},{"key":"key-10.1145\/3041021.3051099-10","unstructured":"Qirong Ho, et al. More effective distributed ml via a stale synchronous parallel parameter server. NIPS 2013."},{"key":"key-10.1145\/3041021.3051099-11","unstructured":"Rie Johnson and Tong Zhang. Accelerating stochastic gradient descent using predictive variance reduction. NIPS 2013."},{"key":"key-10.1145\/3041021.3051099-12","unstructured":"Guolin Ke, et al. A Communication-Efficient Parallel Algorithm for Decision Tree, AAAI 2017."},{"key":"key-10.1145\/3041021.3051099-13","unstructured":"John Langford, et al. Sparse online learning via truncated gradient. NIPS 2009."},{"key":"key-10.1145\/3041021.3051099-14","unstructured":"Jason Lee, et a;. Distributed stochastic variance reduced gradient methods. arXiv:1507.07595 (2015)"},{"key":"key-10.1145\/3041021.3051099-15","unstructured":"Mu Li, et al. Parameter server for distributed machine learning. Big Learning Workshop, 2013."},{"key":"key-10.1145\/3041021.3051099-16","unstructured":"Xiangrui Meng, et al. Mllib: Machine learning in apache spark. JMLR 2016."},{"key":"key-10.1145\/3041021.3051099-17","unstructured":"Qi Meng, et al. Asynchronous Accelerated Stochastic Gradient Descent, IJCAI 2016."},{"key":"key-10.1145\/3041021.3051099-18","doi-asserted-by":"crossref","unstructured":"Qi Meng, et al. Asynchronous Stochastic Proximal Optimization Algorithms with Variance Reduction, AAAI 2017.","DOI":"10.1609\/aaai.v31i1.10910"},{"key":"key-10.1145\/3041021.3051099-19","doi-asserted-by":"crossref","unstructured":"Arkadi Nemirovski, et al. Robust stochastic approximation approach to stochastic programming. In SIAM Journal on Optimization, 2009.","DOI":"10.1137\/070704277"},{"key":"key-10.1145\/3041021.3051099-20","doi-asserted-by":"crossref","unstructured":"Yurii Nesterov. Introductory lectures on convex optimization, Springer Science &#38; Business Media, 2004.","DOI":"10.1007\/978-1-4419-8853-9"},{"key":"key-10.1145\/3041021.3051099-21","doi-asserted-by":"crossref","unstructured":"Yurii Nesterov. Efficiency of coordinate descent methods on huge-scale optimization problems. SIAM Journal on Optimization, 2012.","DOI":"10.1137\/100802001"},{"key":"key-10.1145\/3041021.3051099-22","unstructured":"Alexander Rakhlin, et al. Making gradient descent optimal for strongly convex stochastic optimization. ICML 2012."},{"key":"key-10.1145\/3041021.3051099-23","doi-asserted-by":"crossref","unstructured":"Peter Richtarik and Martin Takac. Iteration complexity of randomized block coordinate descent methods for minimizing a composite function. Mathematical Programming, 2014.","DOI":"10.1007\/s10107-012-0614-z"},{"key":"key-10.1145\/3041021.3051099-24","unstructured":"Shizhao Sun, et al. Ensemble-Compression: A New Method for Parallel Training of Deep Neural Networks, arXiv:1606.00575 (2016)"},{"key":"key-10.1145\/3041021.3051099-25","unstructured":"Shuxin Zheng, et al. Asynchronous Stochastic Gradient Descent with Delay Compensation for Distributed Deep Learning, arXiv preprint (2016)"},{"key":"key-10.1145\/3041021.3051099-26","doi-asserted-by":"crossref","unstructured":"Eric P. Xing, et al. Petuum: A new platform for distributed machine learning on big data. IEEE Transactions on Big Data, 2015.","DOI":"10.1145\/2783258.2783323"},{"key":"key-10.1145\/3041021.3051099-27","doi-asserted-by":"crossref","unstructured":"D. Gabay and B. Mercier, A dual algorithm for the solution of nonlinear variational problems via finite element approximation, Computers &#38; Mathematics with Applications, 1976.","DOI":"10.1016\/0898-1221(76)90003-1"},{"key":"key-10.1145\/3041021.3051099-28","doi-asserted-by":"crossref","unstructured":"DC Liu and Jorge Nocedal. On the limited memory BFGS method for large scale optimization. Mathematical programming, 1989.","DOI":"10.1007\/BF01589116"},{"key":"key-10.1145\/3041021.3051099-29","unstructured":"R. H. Byrd, S.L. Hansen Jorge Nocedal, Y. Singer, A Stochastic Quasi-Newton Method for Large-Scale Optimization, SIAM Journal on Optimization."},{"key":"key-10.1145\/3041021.3051099-30","unstructured":"M. Frank, P. Wolfe, An algorithm for quadratic programming, Naval Research Logistics Quarterly, 1952."},{"key":"key-10.1145\/3041021.3051099-31","unstructured":"Jaggi, Martin, Revisiting Frank--Wolfe: Projection-Free Sparse Convex Optimization, Journal of Machine Learning Research, 2013."},{"key":"key-10.1145\/3041021.3051099-32","unstructured":"Sutskever, Ilya, et al. On the importance of initialization and momentum in deep learning. ICML 2013."},{"key":"key-10.1145\/3041021.3051099-33","unstructured":"Ruiliang Zhang, James T. Kwok, Asynchronous Distributed ADMM for Consensus Optimization, ICML 2014."},{"key":"key-10.1145\/3041021.3051099-34","unstructured":"Reddi, Sashank J., et al. On variance reduction in stochastic gradient descent and its asynchronous variants. NIPS 2015."},{"key":"key-10.1145\/3041021.3051099-35","unstructured":"Jason Lee, et al. Distributed stochastic variance reduced gradient methods. arXiv:1507.07595 (2015)"}],"event":{"number":"26","sponsor":["SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web","IW3C2, International World Wide Web Conference Committee"],"acronym":"WWW '17 Companion","name":"the 26th International Conference","start":{"date-parts":[[2017,4,3]]},"location":"Perth, Australia","end":{"date-parts":[[2017,4,7]]}},"container-title":["Proceedings of the 26th International Conference on World Wide Web Companion - WWW '17 Companion"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3041021.3051099","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3051099&ftid=1865257&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T19:04:58Z","timestamp":1750273498000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3041021.3051099"}},"subtitle":["Foundations, Trends, and Practices"],"proceedings-subject":"World Wide Web Companion","short-title":[],"issued":{"date-parts":[[2017]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1145\/3041021.3051099","relation":{},"subject":[],"published":{"date-parts":[[2017]]}}}