{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T11:17:45Z","timestamp":1730200665237,"version":"3.28.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,17]]},"DOI":"10.1109\/bigdata55660.2022.10021133","type":"proceedings-article","created":{"date-parts":[[2023,1,26]],"date-time":"2023-01-26T19:35:23Z","timestamp":1674761723000},"page":"1401-1408","source":"Crossref","is-referenced-by-count":3,"title":["PaddleBox: Communication-Efficient TeraByte-Scale Model Training Framework for Online Advertising"],"prefix":"10.1109","author":[{"given":"Weijie","family":"Zhao","sequence":"first","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]},{"given":"Xuewu","family":"Jiao","sequence":"additional","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]},{"given":"Mingqing","family":"Hu","sequence":"additional","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]},{"given":"Xiaoyun","family":"Li","sequence":"additional","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]},{"given":"Xiangyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]},{"given":"Ping","family":"Li","sequence":"additional","affiliation":[{"name":"Baidu Research Baidu Search Ads (Phoenix Nest), Baidu Inc.,Cognitive Computing Lab,Washington,USA,98004"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/792550.792552"},{"article-title":"On the convergence of decentralized adaptive gradient methods","volume-title":"Proceedings of The 14th Asian Conference on Machine Learning (ACML)","author":"Chen","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3412815.3416891"},{"article-title":"On the convergence of A class of adam-type algorithms for non-convex optimization","volume-title":"Proceedings of the 7th International Conference on Learning Representations (ICLR)","author":"Chen","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"article-title":"Project adam: Building an efficient and scalable deep learning training system","volume-title":"Proceedings of the 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Chilimbi","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"ref8","first-page":"37","article-title":"Exploiting bounded staleness to speed up big data analytics","volume-title":"Proceedings of the 2014 USENIX Annual Technical Conference (USENIX ATC)","author":"Cui"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901323"},{"key":"ref10","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1257\/aer.97.1.242"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1002\/bult.1720320206"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330651"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463116"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"article-title":"Web-scale bayesian click-through rate prediction for sponsored search advertising in microsoft\u2019s bing search engine","volume-title":"Proceedings of the 27th International Conference on Machine Learning (ICML)","author":"Graepel","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"ref18","first-page":"1223","article-title":"More effective distributed ml via a stale synchronous parallel parameter server","volume-title":"Advances in Neural Information Processing Systems (NIPS)","author":"Ho"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2005.50"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3187009.3177734"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"article-title":"On distributed adaptive optimization with gradient compression","volume-title":"Proceedings of the Tenth International Conference on Learning Representations (ICLR)","author":"Li","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220023"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267840"},{"key":"ref26","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proceedings of the 20th International Conference on Artificial Intelligence and Statistics (AISTATS)","author":"McMahan"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0151"},{"article-title":"Adaptive federated optimization","volume-title":"Proceedings of the 9th International Conference on Learning Representations (ICLR)","author":"Reddi","key":"ref28"},{"article-title":"On the convergence of Adam and beyond","volume-title":"Proceedings of the 6th International Conference on Learning Representations (ICLR)","author":"Reddi","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939704"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/79173.79181"},{"article-title":"An optimistic acceleration of amsgrad for nonconvex optimization","volume-title":"Proceedings of the Asian Conference on Machine Learning (ACML)","author":"Wang","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457236"},{"key":"ref34","first-page":"7184","article-title":"On the linear speedup analysis of communication efficient momentum SGD for distributed non-convex optimization","volume-title":"Proceedings of the 36th International Conference on Machine Learning (ICML)","author":"Yu"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403297"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10021016"},{"article-title":"Distributed hierarchical GPU parameter server for massive scale deep learning ads systems","volume-title":"Proceedings of Machine Learning and Systems 2020 (MLSys)","author":"Zhao","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"article-title":"On the convergence of adaptive gradient methods for nonconvex optimization","year":"2018","author":"Zhou","key":"ref39"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"}],"event":{"name":"2022 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2022,12,17]]},"location":"Osaka, Japan","end":{"date-parts":[[2022,12,20]]}},"container-title":["2022 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10020192\/10020156\/10021133.pdf?arnumber=10021133","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T06:08:16Z","timestamp":1707804496000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10021133\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,17]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/bigdata55660.2022.10021133","relation":{},"subject":[],"published":{"date-parts":[[2022,12,17]]}}}