{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:30:17Z","timestamp":1772119817754,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,4,15]],"date-time":"2021-04-15T00:00:00Z","timestamp":1618444800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,4,15]],"date-time":"2021-04-15T00:00:00Z","timestamp":1618444800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2021,7]]},"DOI":"10.1007\/s00778-021-00664-7","type":"journal-article","created":{"date-parts":[[2021,4,15]],"date-time":"2021-04-15T23:13:03Z","timestamp":1618528383000},"page":"693-712","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Model averaging in distributed machine learning: a case study with Apache Spark"],"prefix":"10.1007","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1935-0946","authenticated-orcid":false,"given":"Yunyan","family":"Guo","sequence":"first","affiliation":[]},{"given":"Zhipeng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jiawei","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Wentao","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Ce","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Jianzhong","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,15]]},"reference":[{"key":"664_CR1","unstructured":"Amazon EC2 on-Demand Instance Pricing. https:\/\/aws.amazon.com\/ec2\/pricing\/on-demand\/"},{"key":"664_CR2","unstructured":"Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., Devin, M., Ghemawat, S., Irving, G., Isard, M., et\u00a0al.: Tensorflow: a system for large-scale machine learning. In: OSDI, pp. 265\u2013283 (2016)"},{"issue":"1","key":"664_CR3","first-page":"1111","volume":"15","author":"A Agarwal","year":"2014","unstructured":"Agarwal, A., Chapelle, O., Dud\u00edk, M., Langford, J.: A reliable effective terascale linear learning system. J. Mach. Learn. Res. 15(1), 1111\u20131133 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"664_CR4","doi-asserted-by":"crossref","unstructured":"Ahmed, A., Aly, M., Gonzalez, J., Narayanamurthy, S., Smola, A.J.: Scalable inference in latent variable models. In: WSDM, pp. 123\u2013132. ACM (2012)","DOI":"10.1145\/2124295.2124312"},{"key":"664_CR5","unstructured":"Alistarh, D., Allen-Zhu, Z., Li, J.: Byzantine stochastic gradient descent. In: Advances in Neural Information Processing Systems, pp. 4613\u20134623 (2018)"},{"issue":"8","key":"664_CR6","doi-asserted-by":"publisher","first-page":"901","DOI":"10.14778\/3090163.3090168","volume":"10","author":"M Anderson","year":"2017","unstructured":"Anderson, M., Smith, S., Sundaram, N., Capot\u0103, M., Zhao, Z., Dulloor, S., Satish, N., Willke, T.L.: Bridging the gap between HPC and big data frameworks. Proc. VLDB Endow. 10(8), 901\u2013912 (2017)","journal-title":"Proc. VLDB Endow."},{"issue":"3","key":"664_CR7","doi-asserted-by":"publisher","first-page":"315","DOI":"10.2307\/2347257","volume":"25","author":"JM Bernardo","year":"1976","unstructured":"Bernardo, J.M., et al.: Psi (digamma) function. Appl. Stat. 25(3), 315\u2013317 (1976)","journal-title":"Appl. Stat."},{"key":"664_CR8","doi-asserted-by":"crossref","unstructured":"Boden, C., Spina, A., Rabl, T., Markl, V.: Benchmarking data flow systems for scalable machine learning. In: Proceedings of the 4th ACM SIGMOD Workshop on Algorithms and Systems for MapReduce and Beyond, pp. 1\u201310 (2017)","DOI":"10.1145\/3070607.3070612"},{"key":"664_CR9","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent, pp. 177\u2013186 (2010)","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"664_CR10","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Neural Networks: Tricks of the Trade, pp. 421\u2013436. Springer (2012)","DOI":"10.1007\/978-3-642-35289-8_25"},{"key":"664_CR11","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: Xgboost: a scalable tree boosting system. In: SIGKDD, pp. 785\u2013794 (2016)","DOI":"10.1145\/2939672.2939785"},{"key":"664_CR12","unstructured":"Chen, W., Wang, Z., Zhou, J.: Large-scale l-BFGS using mapreduce. In: Advances in Neural Information Processing Systems, pp. 1332\u20131340 (2014)"},{"key":"664_CR13","doi-asserted-by":"crossref","unstructured":"Dai, J., Wang, Y., Qiu, X., Ding, D., Zhang, Y., Wang, Y., Jia, X., Zhang, C., Wan, Y., Li, Z., et\u00a0al.: Bigdl: a distributed deep learning framework for big data. Preprint arXiv:1804.05839 (2018)","DOI":"10.1145\/3357223.3362707"},{"issue":"Jan.","key":"664_CR14","first-page":"165","volume":"13","author":"O Dekel","year":"2012","unstructured":"Dekel, O., Gilad-Bachrach, R., Shamir, O., Xiao, L.: Optimal distributed online prediction using mini-batches. Journal of Machine Learning Research 13(Jan.), 165\u2013202 (2012)","journal-title":"Journal of Machine Learning Research"},{"key":"664_CR15","doi-asserted-by":"crossref","unstructured":"Fan, W., Xu, J., Wu, Y., Yu, W., Jiang, J., Zheng, Z., Zhang, B., Cao, Y., Tian, C.: Parallelizing sequential graph computations. In: SIGMOD, pp. 495\u2013510 (2017)","DOI":"10.1145\/3035918.3035942"},{"key":"664_CR16","doi-asserted-by":"crossref","unstructured":"Foulds, J., Boyles, L., DuBois, C., Smyth, P., Welling, M.: Stochastic collapsed variational Bayesian inference for latent Dirichlet allocation. In: SIGKDD, pp. 446\u2013454. ACM (2013)","DOI":"10.1145\/2487575.2487697"},{"key":"664_CR17","unstructured":"Hoffman, M., Bach, F.R., Blei, D.M.: Online learning for latent Dirichlet allocation. In: NIPS, pp. 856\u2013864 (2010)"},{"key":"664_CR18","unstructured":"Hsieh, K., Harlap, A., Vijaykumar, N., Konomis, D., Ganger, G.R., Gibbons, P.B., Mutlu, O.: Gaia: Geo-distributed machine learning approaching $$\\{$$LAN$$\\}$$ speeds. In: NSDI, pp. 629\u2013647 (2017)"},{"issue":"5","key":"664_CR19","doi-asserted-by":"publisher","first-page":"566","DOI":"10.1145\/3187009.3177734","volume":"11","author":"Y Huang","year":"2018","unstructured":"Huang, Y., Jin, T., Wu, Y., Cai, Z., Yan, X., Yang, F., Li, J., Guo, Y., Cheng, J.: Flexps: flexible parallelism control in parameter server architecture. Proc. VLDB Endow. 11(5), 566\u2013579 (2018)","journal-title":"Proc. VLDB Endow."},{"key":"664_CR20","doi-asserted-by":"crossref","unstructured":"Jiang, J., Cui, B., Zhang, C., Yu, L.: Heterogeneity-aware distributed parameter servers. In: SIGMOD, pp. 463\u2013478 (2017)","DOI":"10.1145\/3035918.3035933"},{"key":"664_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, J., Fu, F., Yang, T., Cui, B.: Sketchml: accelerating distributed machine learning with data sketches. In: Proceedings of the 2018 International Conference on Management of Data, pp. 1269\u20131284 (2018)","DOI":"10.1145\/3183713.3196894"},{"issue":"2","key":"664_CR22","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1093\/nsr\/nwx018","volume":"5","author":"J Jiang","year":"2017","unstructured":"Jiang, J., Yu, L., Jiang, J., Liu, Y., Cui, B.: Angel: a new large-scale machine learning system. Natl. Sci. Rev. 5(2), 216\u2013236 (2017)","journal-title":"Natl. Sci. Rev."},{"key":"664_CR23","unstructured":"Jiang, P., Agrawal, G.: A linear speedup analysis of distributed deep learning with sparse and quantized communication. In: Advances in Neural Information Processing Systems, pp. 2525\u20132536 (2018)"},{"key":"664_CR24","doi-asserted-by":"crossref","unstructured":"Kaoudi, Z., Quian\u00e9-Ruiz, J.A., Thirumuruganathan, S., Chawla, S., Agrawal, D.: A cost-based optimizer for gradient descent optimization. In: SIGMOD, pp. 977\u2013992. ACM (2017)","DOI":"10.1145\/3035918.3064042"},{"key":"664_CR25","unstructured":"Kucukelbir, A., Ranganath, R., Gelman, A., Blei, D.: Automatic variational inference in Stan. In: NIPS, pp. 568\u2013576 (2015)"},{"key":"664_CR26","doi-asserted-by":"crossref","unstructured":"Li, F., Chen, L., Zeng, Y., Kumar, A., Wu, X., Naughton, J.F., Patel, J.M.: Tuple-oriented compression for large-scale mini-batch stochastic gradient descent. In: Proceedings of the 2019 International Conference on Management of Data, pp. 1517\u20131534 (2019)","DOI":"10.1145\/3299869.3300070"},{"key":"664_CR27","doi-asserted-by":"crossref","unstructured":"Li, M., Anderson, D.G., Park, J.W., Smola, A.J., Ahmed, A., Josifovski, V., Long, J., Shekita, E.J., Su, B.Y.: Scaling distributed machine learning with the parameter server. In: OSDI, pp. 583\u2013598 (2014)","DOI":"10.1145\/2640087.2644155"},{"issue":"1\u20133","key":"664_CR28","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/BF01589116","volume":"45","author":"DC Liu","year":"1989","unstructured":"Liu, D.C., Nocedal, J.: On the limited memory BFGS method for large scale optimization. Math. Program. 45(1\u20133), 503\u2013528 (1989)","journal-title":"Math. Program."},{"key":"664_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., Zeng, J., Yang, X., Yan, J., Yang, Q.: Scalable parallel EM algorithms for latent Dirichlet allocation in multi-core systems. In: WWW, pp. 669\u2013679 (2015)","DOI":"10.1145\/2736277.2741106"},{"key":"664_CR30","unstructured":"McSherry, F., Isard, M., Murray, D.G.: Scalability! but at what $$\\{$$COST$$\\}$$? In: HotOS (2015)"},{"issue":"1","key":"664_CR31","first-page":"1235","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng, X., Bradley, J., Yavuz, B., Sparks, E., Venkataraman, S., Liu, D., Freeman, J., Tsai, D., Amde, M., Owen, S., et al.: Mllib: machine learning in Apache Spark. J. Mach. Learn. Res. 17(1), 1235\u20131241 (2016)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"664_CR32","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/s41019-017-0034-4","volume":"2","author":"M Onizuka","year":"2017","unstructured":"Onizuka, M., Fujimori, T., Shiokawa, H.: Graph partitioning for distributed graph processing. Data Sci. Eng. 2(1), 94\u2013105 (2017)","journal-title":"Data Sci. Eng."},{"key":"664_CR33","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et\u00a0al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, pp. 8024\u20138035 (2019)"},{"key":"664_CR34","unstructured":"\u0158eh\u016f\u0159ek, R., Sojka, P.: Software framework for topic modelling with large corpora. In: Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks, pp. 45\u201350. ELRA, Valletta, Malta (2010). http:\/\/is.muni.cz\/publication\/884893\/en"},{"key":"664_CR35","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 22, 400\u2013407 (1951)","journal-title":"Ann. Math. Stat."},{"key":"664_CR36","unstructured":"Stich, S.U.: Local SGD converges fast and communicates little. In: ICLR 2019 International Conference on Learning Representations, CONF (2019)"},{"issue":"1","key":"664_CR37","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R Thakur","year":"2005","unstructured":"Thakur, R., Rabenseifner, R., Gropp, W.: Optimization of collective communication operations in MPICH. Int. J. High Perform. Comput. Appl. 19(1), 49\u201366 (2005)","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"1","key":"664_CR38","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1007\/s41019-016-0024-y","volume":"2","author":"K Ueno","year":"2017","unstructured":"Ueno, K., Suzumura, T., Maruyama, N., Fujisawa, K., Matsuoka, S.: Efficient breadth-first search on massively parallel and distributed-memory machines. Data Sci. Eng. 2(1), 22\u201335 (2017)","journal-title":"Data Sci. Eng."},{"key":"664_CR39","unstructured":"Xie, C., Koyejo, S., Gupta, I.: Zeno: Distributed stochastic gradient descent with suspicion-based fault-tolerance. In: International Conference on Machine Learning, pp. 6893\u20136901 (2019)"},{"issue":"2","key":"664_CR40","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/TBDATA.2015.2472014","volume":"1","author":"EP Xing","year":"2015","unstructured":"Xing, E.P., Ho, Q., Dai, W., Kim, J.K., Wei, J., Lee, S., Zheng, X., Xie, P., Kumar, A., Yu, Y.: Petuum: a new platform for distributed machine learning on big data. IEEE Trans. Big Data 1(2), 49\u201367 (2015)","journal-title":"IEEE Trans. Big Data"},{"issue":"14","key":"664_CR41","doi-asserted-by":"publisher","first-page":"1917","DOI":"10.14778\/2733085.2733097","volume":"7","author":"N Xu","year":"2014","unstructured":"Xu, N., Chen, L., Cui, B.: LogGP: a log-based dynamic graph partitioning method. Proc. VLDB Endow. 7(14), 1917\u20131928 (2014)","journal-title":"Proc. VLDB Endow."},{"key":"664_CR42","doi-asserted-by":"crossref","unstructured":"Yuan, J., Gao, F., Ho, Q., Dai, W., Wei, J., Zheng, X., Xing, E.P., Liu, T.Y., Ma, W.Y.: Lightlda: big topic models on modest computer clusters. In: World Wide Web, pp. 1351\u20131361 (2015)","DOI":"10.1145\/2736277.2741115"},{"issue":"11","key":"664_CR43","doi-asserted-by":"publisher","first-page":"1406","DOI":"10.14778\/3137628.3137649","volume":"10","author":"L Yut","year":"2017","unstructured":"Yut, L., Zhang, C., Shao, Y., Cui, B.: LDA*: a robust and large-scale topic modeling system. Proc. VLDB Endow. 10(11), 1406\u20131417 (2017)","journal-title":"Proc. VLDB Endow."},{"issue":"10\u201310","key":"664_CR44","first-page":"95","volume":"10","author":"M Zaharia","year":"2010","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. HotCloud 10(10\u201310), 95 (2010)","journal-title":"HotCloud"},{"key":"664_CR45","unstructured":"Zaheer, M., Wick, M., Tristan, J.B., Smola, A., Steele, G.: Exponential stochastic cellular automata for massively parallel inference. In: Artificial Intelligence and Statistics, pp. 966\u2013975 (2016)"},{"issue":"12","key":"664_CR46","doi-asserted-by":"publisher","first-page":"11","DOI":"10.14778\/2732977.2733001","volume":"7","author":"C Zhang","year":"2014","unstructured":"Zhang, C., R\u00e9, C.: Dimmwitted: a study of main-memory statistical analytics. Proc. VLDB Endow. 7(12), 11 (2014)","journal-title":"Proc. VLDB Endow."},{"key":"664_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zeng, L., Wu, W., Zhang, C.: How good are machine learning clouds for binary classification with good features? In: SoCC, p. 649 (2017)","DOI":"10.1145\/3127479.3132570"},{"key":"664_CR48","unstructured":"Zhang, J., De\u00a0Sa, C., Mitliagkas, I., R\u00e9, C.: Parallel SGD: When does averaging help? Preprint arXiv:1606.07365 (2016)"},{"key":"664_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, K., Alqahtani, S., Demirbas, M.: A comparison of distributed machine learning platforms. In: ICCCN, pp. 1\u20139 (2017)","DOI":"10.1109\/ICCCN.2017.8038464"},{"key":"664_CR50","unstructured":"Zhang, Y., Jordan, M.I.: Splash: User-friendly programming interface for parallelizing stochastic algorithms. Preprint arXiv:1506.07552 (2015)"},{"key":"664_CR51","unstructured":"Zinkevich, M., Weimer, M., Li, L., Smola, A.J.: Parallelized stochastic gradient descent. In: NIPS, pp. 2595\u20132603 (2010)"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-021-00664-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-021-00664-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-021-00664-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,6,29]],"date-time":"2021-06-29T08:20:37Z","timestamp":1624954837000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-021-00664-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,15]]},"references-count":51,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,7]]}},"alternative-id":["664"],"URL":"https:\/\/doi.org\/10.1007\/s00778-021-00664-7","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,4,15]]},"assertion":[{"value":"3 December 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 September 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 April 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}