{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,15]],"date-time":"2025-05-15T11:45:15Z","timestamp":1747309515479,"version":"3.37.3"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2018YFB1004403"],"award-info":[{"award-number":["2018YFB1004403"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61832001","61972004","61702015","U1936104","61702016"],"award-info":[{"award-number":["61832001","61972004","61702015","U1936104","61702016"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2020RC25"],"award-info":[{"award-number":["2020RC25"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Academy of Artificial Intelligence"},{"name":"PKU-Baidu Fund","award":["2019BD006"],"award-info":[{"award-number":["2019BD006"]}]},{"name":"PKU-Tencent Joint research Lab"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2022,9,1]]},"DOI":"10.1109\/tkde.2020.3038109","type":"journal-article","created":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T21:08:52Z","timestamp":1605560932000},"page":"4119-4132","source":"Crossref","is-referenced-by-count":8,"title":["CuWide: Towards Efficient Flow-Based Training for Sparse Wide Models on GPUs"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9371-8358","authenticated-orcid":false,"given":"Xupeng","family":"Miao","sequence":"first","affiliation":[{"name":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"}]},{"given":"Lingxiao","family":"Ma","sequence":"additional","affiliation":[{"name":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8219-4499","authenticated-orcid":false,"given":"Zhi","family":"Yang","sequence":"additional","affiliation":[{"name":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8559-2628","authenticated-orcid":false,"given":"Yingxia","family":"Shao","sequence":"additional","affiliation":[{"name":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1681-4677","authenticated-orcid":false,"given":"Bin","family":"Cui","sequence":"additional","affiliation":[{"name":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Institute of Computational Social Science, Peking University, Beijing, China"}]},{"given":"Lele","family":"Yu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}]},{"given":"Jiawei","family":"Jiang","sequence":"additional","affiliation":[{"name":"ETH Zurich, Zurich, Switzerland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2723713"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00194"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/BigComp.2018.00050"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2488200"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240396"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"ref8","first-page":"13","article-title":"Web-scale Bayesian click-through rate prediction for sponsored search advertising in Microsoft\u2019s bing search engine","volume-title":"Proc. 27th Int. Conf. Mach. Learn.","author":"Graepel"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-019-00115-y"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.128"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2733001"},{"key":"ref12","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Conf. Operating Syst. Des. Implementation","author":"Abadi"},{"article-title":"Automatic differentiation in PyTorch","year":"2017","author":"Paszke","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/J.ENG.2016.02.008"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2038916.2038929"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2168752.2168771"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-019-0088-6"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.14778\/2212351.2212354"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901331"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2012.148"},{"key":"ref25","article-title":"MonetDB: Two decades of research in column-oriented database","author":"Idreos","year":"2012","journal-title":"IEEE Data Eng. Bull."},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"year":"2019","key":"ref27","article-title":"Profiler :: Cuda toolkit documentation"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3379500"},{"key":"ref29","first-page":"250","article-title":"Snap ML: A hierarchical framework for machine learning","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"D\u00fcnner"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton.2011.6120320"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417050"},{"key":"ref32","article-title":"Salus: Fine-grained GPU sharing primitives for deep learning applications","author":"Yu","year":"2019","journal-title":"CoRR"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2016.7840590"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35289-8_26"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035933"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3314038"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303957"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-014-0846-1"},{"key":"ref40","first-page":"2737","article-title":"Asynchronous parallel stochastic gradient for nonconvex optimization","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","author":"Lian"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-019-00596-3"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00134"},{"key":"ref43","first-page":"802","article-title":"Coupled group lasso for web-scale CTR prediction in display advertising","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Yan"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-018-9832-y"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-018-9567-8"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3196959.3196960"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3209889.3209896"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037740"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3078597.3078602"},{"article-title":"Deep learning with H2O","year":"2016","author":"Candel","key":"ref50"},{"issue":"Feb","key":"ref52","first-page":"567","article-title":"Stochastic dual coordinate ascent methods for regularized loss minimization","volume":"14","author":"Shalev-Shwartz","year":"2013","journal-title":"J. Mach. Learn. Res."},{"key":"ref53","first-page":"21:1","article-title":"ThunderSVM: A fast SVM library on GPUs and CPUs","volume":"19","author":"Wen","year":"2018","journal-title":"J. Mach. Learn. Res."},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2018.2866097"},{"article-title":"BIDMach: Large-scale learning with zero memory allocation","volume-title":"Proc. BigLearn Workshop NeurIPS","author":"Canny","key":"ref55"},{"article-title":"Mini-batch serialization: CNN training with inter-layer data reuse","volume-title":"Proc. Conf. Mach. Learn. Syst.","author":"Lym","key":"ref56"},{"key":"ref57","first-page":"2024","article-title":"Persistent RNNs: Stashing recurrent weights on-chip","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Diamos"},{"article-title":"Sparse persistent RNNs: Squeezing large recurrent networks on-chip","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhu","key":"ref58"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.319"},{"key":"ref60","first-page":"195","article-title":"Garaph: Efficient GPU-accelerated graph processing on a single machine with balanced replication","volume-title":"Proc. USENIX Conf. Usenix Annu. Tech. Conf.","author":"Ma"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/9851540\/09261124.pdf?arnumber=9261124","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:43:02Z","timestamp":1704843782000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9261124\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,1]]},"references-count":57,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2020.3038109","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"type":"print","value":"1041-4347"},{"type":"electronic","value":"1558-2191"},{"type":"electronic","value":"2326-3865"}],"subject":[],"published":{"date-parts":[[2022,9,1]]}}}