{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:38:43Z","timestamp":1775122723290,"version":"3.50.1"},"reference-count":115,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T00:00:00Z","timestamp":1741132800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T00:00:00Z","timestamp":1741132800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s00778-025-00908-w","type":"journal-article","created":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T02:24:32Z","timestamp":1741141472000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Efficient and scalable huge embedding model training via distributed cache management"],"prefix":"10.1007","volume":"34","author":[{"given":"Xupeng","family":"Miao","sequence":"first","affiliation":[]},{"given":"Hailin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yining","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Xiaonan","family":"Nie","sequence":"additional","affiliation":[]},{"given":"Zhi","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yangyu","family":"Tao","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Cui","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,5]]},"reference":[{"key":"908_CR1","unstructured":"Criteo Terabyte Click Logs. (2013). https:\/\/labs.criteo.com\/2013\/12\/download-terabyte-click-logs\/"},{"key":"908_CR2","unstructured":"Criteo kaggle ad. (2014) https:\/\/www.kaggle.com\/c\/criteo-display-ad-challenge"},{"key":"908_CR3","unstructured":"Mlperf benchmark. https:\/\/mlperf.org (2020)"},{"key":"908_CR4","unstructured":"Fae (2021). https:\/\/github.com\/STAR-Laboratory\/Accelerating-RecSys-Training"},{"key":"908_CR5","unstructured":"Het appendix (2021). https:\/\/github.com\/Hsword\/HET\/blob\/main\/vldb2021_het_appendix.pdf"},{"key":"908_CR6","unstructured":"Hetu (2021). https:\/\/github.com\/PKU-DAIR\/Hetu"},{"key":"908_CR7","unstructured":"Intel mkl dnn (onednn). https:\/\/github.com\/oneapi-src\/oneDNN (2021)"},{"key":"908_CR8","unstructured":"Nvidia collective communications library (nccl). https:\/\/developer.nvidia.com\/nccl (2021)"},{"key":"908_CR9","unstructured":"Nvidia hugectr. https:\/\/github.com\/NVIDIA\/HugeCTR (2021)"},{"key":"908_CR10","unstructured":"Ps-lite. https:\/\/github.com\/dmlc\/ps-lite (2021)"},{"key":"908_CR11","unstructured":"Persia (2022). https:\/\/github.com\/PersiaML\/Persia"},{"key":"908_CR12","unstructured":"PICASSO (2022). https:\/\/github.com\/alibaba\/HybridBackend"},{"key":"908_CR13","unstructured":"Bagpipe (2023). https:\/\/github.com\/uw-mad-dash\/bagpipe"},{"key":"908_CR14","unstructured":"EmbedX (2023). https:\/\/github.com\/Tencent\/embedx"},{"key":"908_CR15","unstructured":"FEC (2023). https:\/\/github.com\/kaihaoma\/FEC"},{"key":"908_CR16","unstructured":"GRACE (2023). https:\/\/github.com\/Linestro\/GRACE"},{"key":"908_CR17","unstructured":"UGACHE (2023). https:\/\/github.com\/SJTU-IPADS\/ugache"},{"key":"908_CR18","unstructured":"Herald (2024). https:\/\/github.com\/HKUST-SING\/herald"},{"key":"908_CR19","unstructured":"PetPS (2024). https:\/\/github.com\/thustorage\/PetPS"},{"key":"908_CR20","unstructured":"RAP (2024). https:\/\/github.com\/Ash-Zheng\/RAP-artifacts"},{"key":"908_CR21","unstructured":"Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., Devin, M., Ghemawat, S., Irving, G., Isard, M., Kudlur, M., Levenberg, J., Monga, R., Moore, S., Murray, D.G., Steiner, B., Tucker, P.A., Vasudevan, V., Warden, P., Wicke, M., Yu, Y., Zheng, X. (2016): Tensorflow: A system for large-scale machine learning. In: OSDI, pp. 265\u2013283"},{"key":"908_CR22","doi-asserted-by":"crossref","unstructured":"Adnan, M., Maboud, Y.E., Mahajan, D., Nair, P.J. (2021): Accelerating recommendation system training by leveraging popular choices. Proceedings of the VLDB Endowment 15(1) pp 127\u2013140","DOI":"10.14778\/3485450.3485462"},{"issue":"1","key":"908_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2151163.2151166","volume":"3","author":"G Adomavicius","year":"2012","unstructured":"Adomavicius, G., Zhang, J.: Impact of data characteristics on recommender systems performance. ACM Trans. Manag. Inf. Syst. 3(1), 1\u201317 (2012)","journal-title":"ACM Trans. Manag. Inf. Syst."},{"key":"908_CR24","doi-asserted-by":"crossref","unstructured":"Agarwal, S., Yan, C., Zhang, Z., Venkataraman, S. (2023): Bagpipe: Accelerating deep recommendation model training. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 348\u2013363","DOI":"10.1145\/3600006.3613142"},{"key":"908_CR25","doi-asserted-by":"crossref","unstructured":"Anand, A., Khosla, M., Singh, J., Zab, J., Zhang, Z. (2019): Asynchronous training of word embeddings for large text corpora. In: WSDM, pp. 168\u2013176","DOI":"10.1145\/3289600.3291011"},{"key":"908_CR26","unstructured":"Berg, B., Berger, D.S., McAllister, S., Grosof, I., Gunasekar, S., Lu, J., Uhlar, M., Carrig, J., Beckmann, N., Harchol-Balter, M., et\u00a0al.: The CacheLib caching engine: Design and experiences at scale. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20), pp. 753\u2013768 (2020)"},{"key":"908_CR27","unstructured":"Carlson, J.: Redis in action. Simon and Schuster (2013)"},{"key":"908_CR28","doi-asserted-by":"crossref","unstructured":"Cheng, H., Koc, L., Harmsen, J., Shaked, T., Chandra, T., Aradhye, H., Anderson, G., Corrado, G., Chai, W., Ispir, M., Anil, R., Haque, Z., Hong, L., Jain, V., Liu, X., Shah, H. (2016): Wide & deep learning for recommender systems. In: DLRS@RecSys, pp. 7\u201310","DOI":"10.1145\/2988450.2988454"},{"key":"908_CR29","unstructured":"Chetlur, S., Woolley, C., Vandermersch, P., Cohen, J., Tran, J., Catanzaro, B., Shelhamer, E. (2014): cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759"},{"key":"908_CR30","doi-asserted-by":"crossref","unstructured":"Chiang, W.L., Liu, X., Si, S., Li, Y., Bengio, S., Hsieh, C.J. (2019): Cluster-gcn: An efficient algorithm for training deep and large graph convolutional networks. In: SIGKDD, pp. 257\u2013266 doi 10.1145\/3292500.3330925","DOI":"10.1145\/3292500.3330925"},{"key":"908_CR31","doi-asserted-by":"crossref","unstructured":"Covington, P., Adams, J., Sargin, E. (2016) : Deep neural networks for youtube recommendations. In: RecSys, pp. 191\u2013198","DOI":"10.1145\/2959100.2959190"},{"key":"908_CR32","unstructured":"Dai, A.M., Olah, C., Le, Q.V. (2015): Document embedding with paragraph vectors. CoRR abs\/1507.07998"},{"key":"908_CR33","doi-asserted-by":"crossref","unstructured":"Dong, S., Miao, X., Liu, P., Wang, X., Cui, B., Li, J. (2022): Het-kg: Communication-efficient knowledge graph embedding training via hotness-aware cache. In: 2022 IEEE 38th International Conference on Data Engineering (ICDE), pp. 1754\u20131766. IEEE","DOI":"10.1109\/ICDE53745.2022.00177"},{"issue":"4","key":"908_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3149371","volume":"13","author":"G Einziger","year":"2017","unstructured":"Einziger, G., Friedman, R., Manes, B.: Tinylfu: a highly efficient cache admission policy. ACM Trans. Storage (ToS) 13(4), 1\u201331 (2017)","journal-title":"ACM Trans. Storage (ToS)"},{"issue":"124","key":"908_CR35","first-page":"5","volume":"2004","author":"B Fitzpatrick","year":"2004","unstructured":"Fitzpatrick, B.: Distributed caching with memcached. Linux journal 2004(124), 5 (2004)","journal-title":"Distributed caching with memcached. Linux journal"},{"key":"908_CR36","unstructured":"Forum, M.P.: Mpi: A message-passing interface standard. Tech. rep, USA (1994)"},{"issue":"1\u20132","key":"908_CR37","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1\u20132), 267\u2013305 (2016)","journal-title":"Math. Program."},{"issue":"1","key":"908_CR38","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1007\/s41019-019-00115-y","volume":"5","author":"Z Gharibshah","year":"2020","unstructured":"Gharibshah, Z., Zhu, X., Hainline, A., Conway, M.: Deep learning for user interest and response prediction in online display advertising. Data Sci. Eng. 5(1), 12\u201326 (2020)","journal-title":"Data Sci. Eng."},{"key":"908_CR39","doi-asserted-by":"crossref","unstructured":"Guo, H., Tang, R., Ye, Y., Li, Z., He, X. (2017): Deepfm: A factorization-machine based neural network for CTR prediction. In: IJCAI, pp. 1725\u20131731","DOI":"10.24963\/ijcai.2017\/239"},{"issue":"5","key":"908_CR40","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-019-2740-1","volume":"64","author":"Q Guo","year":"2021","unstructured":"Guo, Q., Qiu, X., Xue, X., Zhang, Z.: Syntax-guided text generation via graph neural network. Sci. China Inf. Sci. 64(5), 152102 (2021)","journal-title":"Sci. China Inf. Sci."},{"key":"908_CR41","first-page":"1024","volume":"30","author":"WL Hamilton","year":"2017","unstructured":"Hamilton, W.L., Ying, Z., Leskovec, J.: Inductive representation learning on large graphs. NeurIPS 30, 1024\u20131034 (2017)","journal-title":"NeurIPS"},{"issue":"2","key":"908_CR42","doi-asserted-by":"publisher","DOI":"10.1007\/S11432-022-3793-7","volume":"67","author":"H He","year":"2024","unstructured":"He, H., Chen, G., Chen, C.Y.: Integrating sequence and graph information for enhanced drug-target affinity prediction. Sci. China Inf. Sci. 67(2), 129101 (2024). https:\/\/doi.org\/10.1007\/S11432-022-3793-7","journal-title":"Sci. China Inf. Sci."},{"issue":"1","key":"908_CR43","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s41019-019-00113-0","volume":"5","author":"J He","year":"2020","unstructured":"He, J., Liu, H., Zheng, Y., Tang, S., He, W., Du, X.: Bi-labeled LDA: inferring interest tags for non-famous users in social network. Data Sci. Eng. 5(1), 27\u201347 (2020)","journal-title":"Data Sci. Eng."},{"key":"908_CR44","first-page":"1223","volume":"26","author":"Q Ho","year":"2013","unstructured":"Ho, Q., Cipar, J., Cui, H., Lee, S., Kim, J.K., Gibbons, P.B., Gibson, G.A., Ganger, G.R., Xing, E.P.: More effective distributed ML via a stale synchronous parallel parameter server. In: NeurIPS 26, 1223\u20131231 (2013)","journal-title":"In: NeurIPS"},{"key":"908_CR45","first-page":"22118","volume":"33","author":"W Hu","year":"2020","unstructured":"Hu, W., Fey, M., Zitnik, M., Dong, Y., Ren, H., Liu, B., Catasta, M., Leskovec, J.: Open graph benchmark: datasets for machine learning on graphs. NeurIPS 33, 22118\u201322133 (2020)","journal-title":"NeurIPS"},{"issue":"3","key":"908_CR46","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1007\/S41019-024-00249-8","volume":"9","author":"Y Hu","year":"2024","unstructured":"Hu, Y., Chen, C., Deng, B., Lai, Y., Lin, H., Zheng, Z., Bian, J.: Decoupling anomaly discrimination and representation learning: self-supervised learning for anomaly detection on attributed graph. Data Sci. Eng. 9(3), 264\u2013277 (2024). https:\/\/doi.org\/10.1007\/S41019-024-00249-8","journal-title":"Data Sci. Eng."},{"key":"908_CR47","doi-asserted-by":"crossref","unstructured":"Jiang, J., Cui, B., Zhang, C., Yu, L. (2017): Heterogeneity-aware distributed parameter servers. In: SIGMOD, pp. 463\u2013478","DOI":"10.1145\/3035918.3035933"},{"key":"908_CR48","doi-asserted-by":"crossref","unstructured":"Kang, W., Cheng, D.Z., Chen, T., Yi, X., Lin, D., Hong, L., Chi, E.H. (2020): Learning multi-granular quantized embeddings for large-vocab categorical features in recommender systems. In: WWW, pp. 562\u2013566. ACM \/ IW3C2","DOI":"10.1145\/3366424.3383416"},{"issue":"3","key":"908_CR49","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/2.268884","volume":"27","author":"R Karedla","year":"1994","unstructured":"Karedla, R., Love, J.S., Wherry, B.G.: Caching strategies to improve disk system performance. Computer 27(3), 38\u201346 (1994)","journal-title":"Computer"},{"key":"908_CR50","first-page":"1","volume":"5","author":"JK Kim","year":"2016","unstructured":"Kim, J.K., Ho, Q., Lee, S., Zheng, X., Dai, W., Gibson, G.A., Xing, E.P.: STRADS: a distributed framework for scheduled model parallel machine learning. EuroSys 5, 1\u201316 (2016)","journal-title":"EuroSys"},{"key":"908_CR51","first-page":"1","volume":"43","author":"S Kim","year":"2019","unstructured":"Kim, S., Yu, G., Park, H., Cho, S., Jeong, E., Ha, H., Lee, S., Jeong, J.S., Chun, B.: Parallax: sparsity-aware data parallel training of deep neural networks. EuroSys 43, 1\u201315 (2019)","journal-title":"EuroSys"},{"key":"908_CR52","unstructured":"Kipf, T.N., Welling, M. (2017): Semi-supervised classification with graph convolutional networks. In: ICLR"},{"key":"908_CR53","doi-asserted-by":"crossref","unstructured":"Kurniawan, D.H., Wang, R., Zulkifli, K.S., Wiranata, F.A., Bent, J., Vigfusson, Y., Gunawi, H.S. (2023): Evstore: Storage and caching capabilities for scaling embedding tables in deep recommendation systems. In: Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 2, pp. 281\u2013294","DOI":"10.1145\/3575693.3575718"},{"key":"908_CR54","unstructured":"Lai, F., Zhang, W., Liu, R., Tsai, W., Wei, X., Hu, Y., Devkota, S., Huang, J., Park, J., Liu, X., et\u00a0al. (2023): $$\\{$$AdaEmbed$$\\}$$: Adaptive embedding for $$\\{$$Large-Scale$$\\}$$ recommendation models. In: 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23), pp. 817\u2013831"},{"issue":"7","key":"908_CR55","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1145\/359545.359563","volume":"21","author":"L Lamport","year":"1978","unstructured":"Lamport, L.: Time, clocks, and the ordering of events in a distributed system. Commun. ACM 21(7), 558\u2013565 (1978)","journal-title":"Commun. ACM"},{"key":"908_CR56","unstructured":"Lerer, A., Wu, L., Shen, J., Lacroix, T., Wehrstedt, L., Bose, A., Peysakhovich, A. (2019): Pytorch-biggraph: A large scale graph embedding system. In: MLSys"},{"key":"908_CR57","doi-asserted-by":"crossref","unstructured":"Li, M., Andersen, D.G., Park, J.W., Smola, A.J., Ahmed, A., Josifovski, V., Long, J., Shekita, E.J., Su, B. (2014): Scaling distributed machine learning with the parameter server. In: OSDI, pp. 583\u2013598","DOI":"10.1145\/2640087.2644155"},{"issue":"5","key":"908_CR58","doi-asserted-by":"publisher","first-page":"1248","DOI":"10.1109\/TC.2024.3365939","volume":"73","author":"S Li","year":"2024","unstructured":"Li, S., Wang, Y., Hanson, E., Chang, A., Ki, Y.S., Li, H.H., Chen, Y.: Ndrec: a near-data processing system for training large-scale recommendation models. IEEE Trans. Computers 73(5), 1248\u20131261 (2024)","journal-title":"IEEE Trans. Computers"},{"issue":"12","key":"908_CR59","first-page":"3005","volume":"13","author":"S Li","year":"2020","unstructured":"Li, S., Zhao, Y., Varma, R., Salpekar, O., Noordhuis, P., Li, T., Paszke, A., Smith, J., Vaughan, B., Damania, P., Chintala, S.: Pytorch distributed: experiences on accelerating data parallel training. PVLDB 13(12), 3005\u20133018 (2020)","journal-title":"PVLDB"},{"key":"908_CR60","doi-asserted-by":"crossref","unstructured":"Lian, J., Zhou, X., Zhang, F., Chen, Z., Xie, X., Sun, G. (2018): xdeepfm: Combining explicit and implicit feature interactions for recommender systems. In: SIGKDD, pp. 1754\u20131763","DOI":"10.1145\/3219819.3220023"},{"key":"908_CR61","first-page":"2737","volume":"28","author":"X Lian","year":"2015","unstructured":"Lian, X., Huang, Y., Li, Y., Liu, J.: Asynchronous parallel stochastic gradient for nonconvex optimization. In: NeurIPS 28, 2737\u20132745 (2015)","journal-title":"In: NeurIPS"},{"key":"908_CR62","doi-asserted-by":"crossref","unstructured":"Lian, X., Yuan, B., Zhu, X., Wang, Y., He, Y., Wu, H., Sun, L., Lyu, H., Liu, C., Dong, X., et\u00a0al. (2022): Persia: An open, hybrid system scaling deep learning-based recommenders up to 100 trillion parameters. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 3288\u20133298","DOI":"10.1145\/3534678.3539070"},{"key":"908_CR63","first-page":"3049","volume":"80","author":"X Lian","year":"2018","unstructured":"Lian, X., Zhang, W., Zhang, C., Liu, J.: Asynchronous decentralized parallel stochastic gradient descent. ICML 80, 3049\u20133058 (2018)","journal-title":"ICML"},{"issue":"8","key":"908_CR64","doi-asserted-by":"publisher","first-page":"716","DOI":"10.14778\/2212351.2212354","volume":"5","author":"Y Low","year":"2012","unstructured":"Low, Y., Gonzalez, J., Kyrola, A., Bickson, D., Guestrin, C., Hellerstein, J.M.: Distributed graphlab: a framework for machine learning in the cloud. Proc. VLDB Endow. 5(8), 716\u2013727 (2012)","journal-title":"Proc. VLDB Endow."},{"key":"908_CR65","unstructured":"Luo, L., Zhang, B., Tsang, M., Ma, Y., Chu, C.H., Chen, Y., Li, S., Hao, Y., Zhao, Y., Lakshminarayanan, G., et al.: Disaggregated multi-tower: Topology-aware modeling technique for efficient large scale recommendation. Proceedings of Machine Learning and Systems 6, 266\u2013278 (2024)"},{"key":"908_CR66","doi-asserted-by":"crossref","unstructured":"Ma, K., Yan, X., Cai, Z., Huang, Y., Wu, Y., Cheng, J.: Fec: Efficient deep recommendation model training with flexible embedding communication. Proceedings of the ACM on Management of Data 1(2), 1\u201321 (2023)","DOI":"10.1145\/3589310"},{"key":"908_CR67","unstructured":"Matam, K.K., Ramezani, H., Wang, F., Chen, Z., Dong, Y., Ding, M., Zhao, Z., Zhang, Z., Wen, E., Eisenman, A. : QuickUpdate: a Real-Time personalization system for Large-Scale recommendation models. In: 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pp. 731\u2013744"},{"key":"908_CR68","doi-asserted-by":"crossref","unstructured":"Miao, X., G\u00fcrel, N.M., Zhang, W., Han, Z., Li, B., Min, W., Rao, S.X., Ren, H., Shan, Y., Shao, Y., Wang, Y., Wu, F., Xue, H., Yang, Y., Zhang, Z., Zhao, Y., Zhang, S., Wang, Y., Cui, B., Zhang, C.: Degnn: Improving graph neural networks with graph decomposition. In: KDD, 28: 1223\u20131233. (2021)","DOI":"10.1145\/3447548.3467312"},{"key":"908_CR69","doi-asserted-by":"publisher","unstructured":"Miao, X., Ma, L., Yang, Z., Shao, Y., Cui, B., Yu, L., Jiang, J.: Cuwide: Towards efficient flow-based training for sparse wide models on gpus. TKDE pp. 1\u20131 (2020). https:\/\/doi.org\/10.1109\/TKDE.2020.3038109","DOI":"10.1109\/TKDE.2020.3038109"},{"key":"908_CR70","doi-asserted-by":"crossref","unstructured":"Miao, X., Nie, X., Shao, Y., Yang, Z., Jiang, J., Ma, L., Cui, B. (2021): Heterogeneity-aware distributed machine learning training via partial reduce. In: SIGMOD, pp. 2262\u20132270. ACM","DOI":"10.1145\/3448016.3452773"},{"issue":"1","key":"908_CR71","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3581-9","volume":"66","author":"X Miao","year":"2023","unstructured":"Miao, X., Nie, X., Zhang, H., Zhao, T., Cui, B.: Hetu: a highly efficient automatic parallel distributed deep learning system. Sci China. Inf Sci 66(1), 117101 (2023)","journal-title":"Sci China. Inf Sci"},{"key":"908_CR72","unstructured":"Miao, X., Oliaro, G., Zhang, Z., Cheng, X., Jin, H., Chen, T., Jia, Z. (2023): Towards efficient generative large language model serving: A survey from algorithms to systems. arXiv preprint arXiv:2312.15234"},{"key":"908_CR73","doi-asserted-by":"crossref","unstructured":"Miao, X., Shi, Y., Zhang, H., Zhang, X., Nie, X., Yang, Z., Cui, B. (2022): Het-gmp: A graph-based system approach to scaling large embedding model training. In: Proceedings of the 2022 International Conference on Management of Data, pp. 470\u2013480","DOI":"10.1145\/3514221.3517902"},{"key":"908_CR74","doi-asserted-by":"crossref","unstructured":"Miao, X., Zhang, H., Shi, Y., Nie, X., Yang, Z., Tao, Y., Cui, B.: Het: scaling out huge embedding model training via cache-enabled distributed framework. Proceedings of the VLDB Endowment 15(2), 312\u2013320 (2021)","DOI":"10.14778\/3489496.3489511"},{"issue":"2","key":"908_CR75","first-page":"1721","volume":"35","author":"X Miao","year":"2021","unstructured":"Miao, X., Zhang, W., Shao, Y., Cui, B., Chen, L., Zhang, C., Jiang, J.: Lasagne: a multi-layer graph convolutional network framework via node-aware deep architecture. IEEE Trans. Knowl. Data Eng. 35(2), 1721\u20131733 (2021)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"908_CR76","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J. (2013): Efficient estimation of word representations in vector space. In: ICLR Workshop"},{"key":"908_CR77","unstructured":"Naumov, M., Mudigere, D., Shi, H.M., Huang, J., Sundaraman, N., Park, J., Wang, X., Gupta, U., Wu, C., Azzolini, A.G., Dzhulgakov, D., Mallevich, A., Cherniavskii, I., Lu, Y., Krishnamoorthi, R., Yu, A., Kondratenko, V., Pereira, S., Chen, X., Chen, W., Rao, V., Jia, B., Xiong, L., Smelyanskiy, M. (2019): Deep learning recommendation model for personalization and recommendation systems. CoRR abs\/1906.00091"},{"issue":"10","key":"908_CR78","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11432-018-9943-9","volume":"63","author":"H Peng","year":"2020","unstructured":"Peng, H., Li, J., Yan, H., Gong, Q., Wang, S., Liu, L., Wang, L., Ren, X.: Dynamic network embedding via incremental skip-gram with negative sampling. Sci. China Inf. Sci. 63(10), 1\u201319 (2020)","journal-title":"Sci. China Inf. Sci."},{"key":"908_CR79","doi-asserted-by":"crossref","unstructured":"Peng, Y., Zhu, Y., Chen, Y., Bao, Y., Yi, B., Lan, C., Wu, C., Guo, C.: A generic communication scheduler for distributed DNN training acceleration. In: SOSP, pp. 16\u201329 (2019)","DOI":"10.1145\/3341301.3359642"},{"key":"908_CR80","doi-asserted-by":"crossref","unstructured":"Perozzi, B., Al-Rfou, R., Skiena, S. (2014): Deepwalk: online learning of social representations. In: SIGKDD, pp. 701\u2013710","DOI":"10.1145\/2623330.2623732"},{"key":"908_CR81","first-page":"693","volume":"24","author":"B Recht","year":"2011","unstructured":"Recht, B., R\u00e9, C., Wright, S.J., Niu, F.: Hogwild: a lock-free approach to parallelizing stochastic gradient descent. NeurIPS 24, 693\u2013701 (2011)","journal-title":"NeurIPS"},{"key":"908_CR82","doi-asserted-by":"crossref","unstructured":"Renz-Wieland, A., Gemulla, R., Kaoudi, Z., Markl, V. (2022): Nups: A parameter server for machine learning with non-uniform parameter access. In: Proceedings of the 2022 International Conference on Management of Data, pp. 481\u2013495","DOI":"10.1145\/3514221.3517860"},{"key":"908_CR83","doi-asserted-by":"crossref","unstructured":"Renz-Wieland, A., Gemulla, R., Zeuch, S., Markl, V. (2020): Dynamic parameter allocation in parameter servers. Proceedings of the VLDB Endowment 13(12), 1877\u20131890","DOI":"10.14778\/3407790.3407796"},{"key":"908_CR84","doi-asserted-by":"crossref","unstructured":"Renz-Wieland, A., Kieslinger, A., Gericke, R., Gemulla, R., Kaoudi, Z., Markl, V. (2023): Good intentions: adaptive parameter management via intent signaling. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 2156\u20132166","DOI":"10.1145\/3583780.3614895"},{"key":"908_CR85","unstructured":"Sergeev, A., Balso, M.D. (2018): Horovod: fast and easy distributed deep learning in tensorflow. CoRR abs\/1802.05799"},{"key":"908_CR86","doi-asserted-by":"crossref","unstructured":"Sethi, G., Acun, B., Agarwal, N., Kozyrakis, C., Trippel, C., Wu, C.J. (2022): Recshard: statistical feature-based memory optimization for industry-scale neural recommendation. In: Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 344\u2013358","DOI":"10.1145\/3503222.3507777"},{"issue":"8","key":"908_CR87","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3648358","volume":"56","author":"Y Shao","year":"2024","unstructured":"Shao, Y., Li, H., Gu, X., Yin, H., Li, Y., Miao, X., Zhang, W., Cui, B., Chen, L.: Distributed graph neural network training: a survey. ACM Computing Surv. 56(8), 1\u201339 (2024)","journal-title":"ACM Computing Surv."},{"key":"908_CR88","unstructured":"Sima, C., Fu, Y., Sit, M.K., Guo, L., Gong, X., Lin, F., Wu, J., Li, Y., Rong, H., Aublin, P.L., et\u00a0al. (2022): Ekko: A $$\\{$$Large-Scale$$\\}$$ deep learning recommender system with $$\\{$$Low-Latency$$\\}$$ model update. In: 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pp. 821\u2013839"},{"key":"908_CR89","doi-asserted-by":"crossref","unstructured":"Song, X., Zhang, Y., Chen, R., Chen, H. (2023): Ugache: A unified gpu cache for embedding-based deep learning. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 627\u2013641","DOI":"10.1145\/3600006.3613169"},{"key":"908_CR90","first-page":"1","volume":"12","author":"R Wang","year":"2017","unstructured":"Wang, R., Fu, B., Fu, G., Wang, M.: Deep & cross network for ad click predictions. ADKDD. 12, 1\u20137 (2017)","journal-title":"ADKDD."},{"key":"908_CR91","doi-asserted-by":"crossref","unstructured":"Wang, X., He, X., Wang, M., Feng, F., Chua, T. (2019): Sigir. pp. 165\u2013174","DOI":"10.1145\/3331184.3331267"},{"key":"908_CR92","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, Y., Deng, J., Zheng, D., Li, A., Ding, Y. (2024): Rap: Resource-aware automated gpu sharing for multi-gpu recommendation model training and input preprocessing. In: Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 2: 964\u2013979","DOI":"10.1145\/3620665.3640406"},{"key":"908_CR93","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, Y., Feng, B., Mudigere, D., Muthiah, B., Ding, Y. (2022): El-rec: Efficient large-scale recommendation model training via tensor-train embedding table. In: SC22: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314. IEEE","DOI":"10.1109\/SC41404.2022.00075"},{"key":"908_CR94","doi-asserted-by":"crossref","unstructured":"Xie, M., Lu, Y., Lin, J., Wang, Q., Gao, J., Ren, K., Shu, J. (2022): Fleche: an efficient gpu embedding cache for personalized recommendations. In: Proceedings of the Seventeenth European Conference on Computer Systems, pp. 402\u2013416","DOI":"10.1145\/3492321.3519554"},{"key":"908_CR95","doi-asserted-by":"crossref","unstructured":"Xie, M., Lu, Y., Wang, Q., Feng, Y., Liu, J., Ren, K., Shu, J. (2023): Petps: Supporting huge embedding models with persistent memory. Proceedings of the VLDB Endowment 16(5): 1013\u20131022","DOI":"10.14778\/3579075.3579077"},{"key":"908_CR96","doi-asserted-by":"crossref","unstructured":"Xie, M., Ren, K., Lu, Y., Yang, G., Xu, Q., Wu, B., Lin, J., Ao, H., Xu, W., Shu, J. (2020): Kraken: memory-efficient continual learning for large-scale real-time recommendations. In: SC, pp. 1\u201317","DOI":"10.1109\/SC41405.2020.00025"},{"issue":"2","key":"908_CR97","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/J.ENG.2016.02.008","volume":"2","author":"EP Xing","year":"2016","unstructured":"Xing, E.P., Ho, Q., Xie, P., Wei, D.: Strategies and principles of distributed machine learning on big data. Engineering 2(2), 179\u2013195 (2016)","journal-title":"Engineering"},{"key":"908_CR98","unstructured":"Yang, J., Yue, Y., Vinayak, R. (2021): Segcache: a memory-efficient and scalable in-memory key-value cache for small objects. In: 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21), pp. 503\u2013518"},{"key":"908_CR99","doi-asserted-by":"crossref","unstructured":"Yang, J., Zhang, Y., Qiu, Z., Yue, Y., Vinayak, R. (2023): Fifo queues are all you need for cache eviction. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 130\u2013149","DOI":"10.1145\/3600006.3613147"},{"key":"908_CR100","doi-asserted-by":"crossref","unstructured":"Ye, H., Vedula, S., Chen, Y., Yang, Y., Bronstein, A., Dreslinski, R., Mudge, T., Talati, N. (2023): Grace: A scalable graph-based approach to accelerating recommendation model inference. In: Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 3: 282\u2013301","DOI":"10.1145\/3582016.3582029"},{"key":"908_CR101","doi-asserted-by":"crossref","unstructured":"Ying, R., He, R., Chen, K., Eksombatchai, P., Hamilton, W.L., Leskovec, J. (2018): Graph convolutional neural networks for web-scale recommender systems. In: SIGKDD, pp. 974\u2013983","DOI":"10.1145\/3219819.3219890"},{"key":"908_CR102","doi-asserted-by":"crossref","unstructured":"Yu, H., Yang, S., Zhu, S.: Parallel restarted SGD with faster convergence and less communication: Demystifying why model averaging works for deep learning. In: AAAI, pp. 5693\u20135700 (2019)","DOI":"10.1609\/aaai.v33i01.33015693"},{"key":"908_CR103","doi-asserted-by":"crossref","unstructured":"Yu, L., Cui, B., Zhang, C., Shao, Y.: Lda*: A robust and large-scale topic modeling system. Proc. VLDB Endow. 10(11), 1406\u20131417 (2017)","DOI":"10.14778\/3137628.3137649"},{"key":"908_CR104","unstructured":"Zeng, C., Liao, X., Cheng, X., Tian, H., Wan, X., Wang, H., Chen, K. (2024): Accelerating neural recommendation training with embedding scheduling. In: 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pp. 1141\u20131156"},{"key":"908_CR105","doi-asserted-by":"crossref","unstructured":"Zhang, H., Liu, Z., Chen, B., Zhao, Y., Zhao, T., Yang, T., Cui, B. (2024): Cafe: Towards compact, adaptive, and fast embedding for large-scale recommendation models. Proceedings of the ACM on Management of Data 2(1), 1\u201328","DOI":"10.1145\/3639306"},{"key":"908_CR106","doi-asserted-by":"crossref","unstructured":"Zhang, J., Bai, B., Lin, Y., Liang, J., Bai, K., Wang, F. (2020): General-purpose user embeddings based on mobile app usage. In: SIGKDD, 2831\u20132840","DOI":"10.1145\/3394486.3403334"},{"key":"908_CR107","doi-asserted-by":"crossref","unstructured":"Zhang, J., Chow, C. (2015): Geosoca: Exploiting geographical, social and categorical correlations for point-of-interest recommendations. In: SIGIR, pp. 443\u2013452","DOI":"10.1145\/2766462.2767711"},{"key":"908_CR108","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Chen, L., Yang, S., Yuan, M., Yi, H., Zhang, J., Wang, J., Dong, J., Xu, Y., Song, Y., et\u00a0al. (2022): Picasso: Unleashing the potential of gpu-centric training for wide-and-deep recommender systems. In: 2022 IEEE 38th International Conference on Data Engineering (ICDE), pp. 3453\u20133466. IEEE","DOI":"10.1109\/ICDE53745.2022.00324"},{"key":"908_CR109","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Cui, B., Shao, Y., Yu, L., Jiang, J., Miao, X. (2019): PS2: parameter server on spark. In: SIGMOD, pp. 376\u2013388","DOI":"10.1145\/3299869.3314038"},{"issue":"2","key":"908_CR110","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/S41019-023-00238-3","volume":"9","author":"Z Zhang","year":"2024","unstructured":"Zhang, Z., Jia, Y., Hou, Y., Yu, X.: Explicit behavior interaction with heterogeneous graph for multi-behavior recommendation. Data Sci. Eng. 9(2), 133\u2013151 (2024). https:\/\/doi.org\/10.1007\/S41019-023-00238-3","journal-title":"Data Sci. Eng."},{"key":"908_CR111","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Chen, J., Chen, M., Jain, S., Beutel, A., Belletti, F., Chi, E.H. (2018): Categorical-attributes-based item classification for recommender systems. In: RecSys, pp. 320\u2013328. ACM","DOI":"10.1145\/3240323.3240367"},{"key":"908_CR112","unstructured":"Zhao, W., Xie, D., Jia, R., Qian, Y., Ding, R., Sun, M., Li, P. (2020): Distributed hierarchical GPU parameter server for massive scale deep learning ads systems. In: MLSys"},{"key":"908_CR113","doi-asserted-by":"crossref","unstructured":"Zhou, G., Zhu, X., Song, C., Fan, Y., Zhu, H., Ma, X., Yan, Y., Jin, J., Li, H., Gai, K. (2018): Deep interest network for click-through rate prediction. In: SIGKDD, pp. 1059\u20131068","DOI":"10.1145\/3219819.3219823"},{"issue":"2","key":"908_CR114","doi-asserted-by":"publisher","DOI":"10.1007\/S11432-021-3529-9","volume":"66","author":"Y Zhou","year":"2023","unstructured":"Zhou, Y., Chen, C., Wang, Y., Han, T., Chen, T.: Context-aware API recommendation using tensor factorization. Sci. China Inf. Sci. 66(2), 122101 (2023). https:\/\/doi.org\/10.1007\/S11432-021-3529-9","journal-title":"Sci. China Inf. Sci."},{"key":"908_CR115","doi-asserted-by":"crossref","unstructured":"Zou, Y., Ding, Z., Shi, J., Guo, S., Su, C., Zhang, Y.: Embedx: A versatile, efficient and scalable platform to embed both graphs and high-dimensional sparse data. Proceedings of the VLDB Endowment 16(12), 3543\u20133556 (2023)","DOI":"10.14778\/3611540.3611546"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-025-00908-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-025-00908-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-025-00908-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,15]],"date-time":"2025-05-15T11:29:34Z","timestamp":1747308574000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-025-00908-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,5]]},"references-count":115,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["908"],"URL":"https:\/\/doi.org\/10.1007\/s00778-025-00908-w","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,5]]},"assertion":[{"value":"30 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 January 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"27"}}