{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T04:03:28Z","timestamp":1750565008472,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731105","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:46:17Z","timestamp":1750437977000},"page":"1880-1893","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TRACI: Network Acceleration of Input-Dynamic Communication for Large-Scale Deep Learning Recommendation Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1280-4781","authenticated-orcid":false,"given":"Guyue","family":"Huang","sequence":"first","affiliation":[{"name":"University of California, Santa Barbara, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4716-1374","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"additional","affiliation":[{"name":"University of Minnesota, Minneapolis, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9861-0469","authenticated-orcid":false,"given":"Le","family":"Qin","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4011-6668","authenticated-orcid":false,"given":"Jiayi","family":"Huang","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9536-1894","authenticated-orcid":false,"given":"Yangwook","family":"Kang","sequence":"additional","affiliation":[{"name":"Samsung Electronics, San Jose, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8716-5793","authenticated-orcid":false,"given":"Yufei","family":"Ding","sequence":"additional","affiliation":[{"name":"University of California, San Diego, San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2093-1788","authenticated-orcid":false,"given":"Yuan","family":"Xie","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00072"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Muhammad Adnan Yassaman\u00a0Ebrahimzadeh Maboud Divya Mahajan and Prashant\u00a0J Nair. 2021. Accelerating recommendation system training by leveraging popular choices. Proceedings of the VLDB Endowment 15 1 (2021) 127\u2013140.","DOI":"10.14778\/3485450.3485462"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919636"},{"key":"e_1_3_3_2_5_2","unstructured":"Ehsan\u00a0K Ardestani Changkyu Kim Seung\u00a0Jae Lee Luoshang Pan Valmiki Rampersad Jens Axboe Banit Agrawal Fuxun Yu Ansha Yu Trung Le et\u00a0al. 2021. Supporting Massive DLRM Inference Through Software Defined Memory. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.11489 (2021)."},{"key":"e_1_3_3_2_6_2","unstructured":"Ge Chen Gaoxiong Zeng and Li Chen. 2021. P4com: In-network computation with programmable switches. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2107.13694 (2021)."},{"key":"e_1_3_3_2_7_2","unstructured":"Li Chen Ge Chen Justinas Lingys and Kai Chen. 2018. Programmable switch as a parallel computing device. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.01491 (2018)."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476178"},{"key":"e_1_3_3_2_11_2","unstructured":"Nadeen Gebara Manya Ghobadi and Paolo Costa. 2021. In-network aggregation for shared machine learning clusters. Proceedings of Machine Learning and Systems 3 (2021) 829\u2013844."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Carlos\u00a0A Gomez-Uribe and Neil Hunt. 2015. The netflix recommender system: Algorithms business value and innovation. ACM Transactions on Management Information Systems (TMIS) 6 4 (2015) 1\u201319.","DOI":"10.1145\/2843948"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462976"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895480"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3326937.3341255"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132764"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00059"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00085"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358284"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00029"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527386"},{"key":"e_1_3_3_2_27_2","volume-title":"CIDR","author":"Lerner Alberto","year":"2019","unstructured":"Alberto Lerner, Rana Hussein, Philippe Cudr\u00e9-Mauroux, and U eXascale Infolab. 2019. The Case for Network Accelerated Query Processing.. In CIDR."},{"key":"e_1_3_3_2_28_2","first-page":"387","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Li Jialin","year":"2020","unstructured":"Jialin Li, Jacob Nelson, Ellis Michael, Xin Jin, and Dan\u00a0RK Ports. 2020. Pegasus: Tolerating Skewed Workloads in Distributed Storage with { In-Network} Coherence Directories. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 387\u2013406."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00023"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037731"},{"key":"e_1_3_3_2_32_2","first-page":"143","volume-title":"17th USENIX Conference on File and Storage Technologies (FAST 19)","author":"Liu Zaoxing","year":"2019","unstructured":"Zaoxing Liu, Zhihao Bai, Zhenming Liu, Xiaozhou Li, Changhoon Kim, Vladimir Braverman, Xin Jin, and Ion Stoica. 2019. { DistCache} : Provable Load Balancing for { Large-Scale} Storage Systems with Distributed Caching. In 17th USENIX Conference on File and Storage Technologies (FAST 19). 143\u2013157."},{"key":"e_1_3_3_2_33_2","unstructured":"Meta. 2022. TorchRec. github.com\/pytorch\/torchrec"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533727"},{"key":"e_1_3_3_2_35_2","unstructured":"Maxim Naumov John Kim Dheevatsa Mudigere Srinivas Sridharan Xiaodong Wang Whitney Zhao Serhat Yilmaz Changkyu Kim Hector Yuen Mustafa Ozdal et\u00a0al. 2020. Deep learning training in facebook data centers: Design of scale-up and scale-out systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2003.09518 (2020)."},{"key":"e_1_3_3_2_36_2","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson\u00a0G Azzolini et\u00a0al. 2019. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1906.00091 (2019)."},{"key":"e_1_3_3_2_37_2","unstructured":"Nvidia. [n. d.]. Nvidia Merlin HugeCTR. https:\/\/developer.nvidia.com\/nvidia-merlin\/hugectr"},{"key":"e_1_3_3_2_38_2","unstructured":"NVIDIA. 2022. NVIDIA H100 Whitepaper. https:\/\/resources.nvidia.com\/en-us-tensor-core"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS48437.2020.00018"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507777"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Brent Smith and Greg Linden. 2017. Two decades of recommender systems at Amazon. com. Ieee internet computing 21 3 (2017) 12\u201318.","DOI":"10.1109\/MIC.2017.72"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389698"},{"key":"e_1_3_3_2_43_2","first-page":"277","volume-title":"19th USENIX Conference on File and Storage Technologies (FAST 21)","author":"Wang Qing","year":"2021","unstructured":"Qing Wang, Youyou Lu, Erci Xu, Junru Li, Youmin Chen, and Jiwu Shu. 2021. Concordia: Distributed Shared Memory with In-Network Cache Coherence. In 19th USENIX Conference on File and Storage Technologies (FAST 21). 277\u2013292."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Tokyo Japan","acronym":"SIGARCH '25"},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731105","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:02:18Z","timestamp":1750503738000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731105"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":44,"alternative-id":["10.1145\/3695053.3731105","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731105","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}