{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:34:14Z","timestamp":1772724854972,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["221108,1931531, 1955815, 1763681, 2116962, 2122155, 2028929"],"award-info":[{"award-number":["221108,1931531, 1955815, 1763681, 2116962, 2122155, 2028929"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3676641.3716003","type":"proceedings-article","created":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T16:47:32Z","timestamp":1743094052000},"page":"589-603","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Load and MLP-Aware Thread Orchestration for Recommendation Systems Inference on CPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4017-2093","authenticated-orcid":false,"given":"Rishabh","family":"Jain","sequence":"first","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7033-336X","authenticated-orcid":false,"given":"Teyuh","family":"Chou","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Santa Clara, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4482-3115","authenticated-orcid":false,"given":"Onur","family":"Kayiran","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Bristol, RI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9835-1254","authenticated-orcid":false,"given":"John","family":"Kalamatianos","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Boxborough, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4616-0144","authenticated-orcid":false,"given":"Gabriel H.","family":"Loh","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Bellevue, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9940-9951","authenticated-orcid":false,"given":"Mahmut T.","family":"Kandemir","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4746-7578","authenticated-orcid":false,"given":"Chita R.","family":"Das","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00081"},{"key":"e_1_3_2_1_2_1","unstructured":"Amazon. 2024. Amazon Recommendation System. \"https:\/\/aws. amazon.com\/personalize\/\"."},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2022. AMD Epyc 9654 CPU code-named Genoa. \"https:\/\/www. amd.com\/en\/product\/12191\"."},{"key":"e_1_3_2_1_4_1","volume-title":"Understanding scaling laws for recommendation models. arXiv preprint arXiv:2208.08489","author":"Ardalani Newsha","year":"2022","unstructured":"Newsha Ardalani, Carole-JeanWu, Zeliang Chen, Bhargav Bhushanam, and Adnan Aziz. 2022. Understanding scaling laws for recommendation models. arXiv preprint arXiv:2208.08489 (2022)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3652892.3700760"},{"key":"e_1_3_2_1_6_1","volume-title":"Yusen Li, and Pavan Balaji.","author":"Chen Sitian","year":"2024","unstructured":"Sitian Chen, Haobin Tan, Amelie Chi Zhou, Yusen Li, and Pavan Balaji. 2024. UpDLRM: Accelerating Personalized Recommendation using Real-World PIM Architecture. arXiv preprint arXiv:2406.13941 (2024)."},{"key":"e_1_3_2_1_7_1","unstructured":"Chips and Cheese. 2024. Ryzen-9950x. \"https:\/\/chipsandcheese.com\/ p\/amds-ryzen-9950x-zen-5-on-desktop\"."},{"key":"e_1_3_2_1_8_1","volume-title":"ElasticRec: A Microservice-based Model Serving Architecture Enabling Elastic Resource Scaling for Recommendation Models. arXiv preprint arXiv:2406.06955","author":"Choi Yujeong","year":"2024","unstructured":"Yujeong Choi, Jiin Kim, and Minsoo Rhu. 2024. ElasticRec: A Microservice-based Model Serving Architecture Enabling Elastic Resource Scaling for Recommendation Models. arXiv preprint arXiv:2406.06955 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"e_1_3_2_1_10_1","unstructured":"Google. 2024. Google Q2 2024 Financial Report. \"https:\/\/abc.xyz\/assets\/ ae\/e9\/753110054014b6de4d620a2853f6\/goog-10-q-q2--2024.pdf\"."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480127"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_15_1","unstructured":"GVR. 2024. Recommendation Engine Market Size. \"https: \/\/www.grandviewresearch.com\/industry-analysis\/recommendationengine- market-report\"."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00024"},{"key":"e_1_3_2_1_17_1","unstructured":"Hulu. 2024. Hulu Recommendation System. https:\/\/help.hulu.com\/ article\/hulu-personalized-recommendations#: :text=While%20you' re%20looking%20for get%20to%20know%20you%20better."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00083"},{"key":"e_1_3_2_1_19_1","unstructured":"Intel. 2024. Intel Extension For Pytorch. \"https:\/\/github.com\/intel\/intelextension- for-pytorch\"."},{"key":"e_1_3_2_1_20_1","volume-title":"Pushing the Performance Envelope of DNN-based Recommendation Systems Inference on GPUs. In 2024 57th IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 1217--1232","author":"Jain Rishabh","year":"2024","unstructured":"Rishabh Jain, Vivek M Bhasi, Adwait Jog, Anand Sivasubramaniam, Mahmut T Kandemir, and Chita R Das. 2024. Pushing the Performance Envelope of DNN-based Recommendation Systems Inference on GPUs. In 2024 57th IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 1217--1232."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589112"},{"key":"e_1_3_2_1_22_1","volume-title":"Asian Conference on Machine Learning. PMLR, 518--533","author":"Jha Gopi Krishna","year":"2024","unstructured":"Gopi Krishna Jha, Anthony Thomas, Nilesh Jain, Sameh Gobriel, Tajana Rosing, and Ravi Iyer. 2024. Mem-Rec: Memory Efficient Recommendation System using Alternative Representation. In Asian Conference on Machine Learning. PMLR, 518--533."},{"key":"e_1_3_2_1_23_1","volume-title":"SPACE: Locality-Aware Processing in Heterogeneous Memory for Personalized Recommendations. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). 679--691","author":"Kal Hongju","year":"2021","unstructured":"Hongju Kal, Seokmin Lee, Gun Ko, and Won Woo Ro. 2021. SPACE: Locality-Aware Processing in Heterogeneous Memory for Personalized Recommendations. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). 679--691. doi:10.1109\/ ISCA52012.2021.00059"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00019"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3097700"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358284"},{"key":"e_1_3_2_1_28_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Lai Fan","year":"2023","unstructured":"Fan Lai, Wei Zhang, Rui Liu, William Tsai, Xiaohan Wei, Yuxi Hu, Sabin Devkota, Jianyu Huang, Jongsoo Park, Xing Liu, Zeliang Chen, EllieWen, Paul Rivera, Jie You, Chun cheng Jason Chen, and Mosharaf Chowdhury. 2023. {AdaEmbed}: Adaptive Embedding for {Large- Scale} Recommendation Models. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, Boston, MA, 817--831. https:\/\/www.usenix.org\/conference\/ osdi23\/presentation\/lai"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446717"},{"key":"e_1_3_2_1_30_1","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Matam Kiran Kumar","year":"2024","unstructured":"Kiran Kumar Matam, Hani Ramezani, Fan Wang, Zeliang Chen, Yue Dong, Maomao Ding, Zhiwei Zhao, Zhengyu Zhang, Ellie Wen, and Assaf Eisenman. 2024. {QuickUpdate}: a {Real-Time} Personalization System for {Large-Scale} Recommendation Models. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24). 731--744."},{"key":"e_1_3_2_1_31_1","unstructured":"Meta. 2023. Embedding lookup Production dataset. \"https:\/\/github. com\/facebookresearch\/dlrm_datasets\"."},{"key":"e_1_3_2_1_32_1","unstructured":"Meta. 2024. DLRM using PyTorch. \"https:\/\/github.com\/ facebookresearch\/dlrm\"."},{"key":"e_1_3_2_1_33_1","unstructured":"Meta. 2024. Facebook Recommendation System. \"https: \/\/ai.meta.com\/blog\/ai-unconnected-content-recommendationsfacebook- instagram\/\"."},{"key":"e_1_3_2_1_34_1","unstructured":"Meta. 2024. Instagram Recommendation System. \"https: \/\/engineering.fb.com\/2023\/08\/09\/ml-applications\/scaling-instagramexplore- recommendations-system\/\"."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Meta. 2024. Meta Q1 2024 Financial Report. \"https: \/\/s21.q4cdn.com\/399680738\/files\/doc_financials\/2024\/q1\/EarningsPresentation-Q1--2024.pdf\".","DOI":"10.1016\/j.fopow.2024.05.046"},{"key":"e_1_3_2_1_36_1","unstructured":"Meta. 2024. Multi-threading in PyTorch. \"https:\/\/pytorch.org\/docs\/ stable\/notes\/cpu_threading_torchscript_inference.html\"."},{"key":"e_1_3_2_1_37_1","unstructured":"Meta. 2024. PyTorch DLRM. \"https:\/\/github.com\/facebookresearch\/ dlrm\/\"."},{"key":"e_1_3_2_1_38_1","unstructured":"Meta. 2024. PyTorch Embedding Bag Operator. \"https: \/\/github.com\/pytorch\/pytorch\/blob\/main\/aten\/src\/ATen\/native\/ EmbeddingBag.cpp\"."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533727"},{"key":"e_1_3_2_1_40_1","unstructured":"Krishnakumar Nair Avinash-Chandra Pandey Siddappa Karabannavar Meena Arunachalam John Kalamatianos Varun Agrawal Saurabh Gupta Ashish Sirasao Elliott Delaye Steve Reinhardt et al. 2024. Parallelization Strategies for DLRM Embedding Bag Operator on AMD CPUs. IEEE Micro (2024)."},{"key":"e_1_3_2_1_41_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. arXiv:1906.00091 [cs.IR]"},{"key":"e_1_3_2_1_42_1","unstructured":"Netflix. 2024. Netflix Recommendation System. \"https:\/\/research. netflix.com\/research-area\/recommendations\"."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS58744.2024.10558132"},{"key":"e_1_3_2_1_44_1","unstructured":"SemiAnalysis. 2024. The Memory Wall. \"https:\/\/www.semianalysis. com\/p\/the-memory-wall\"."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507777"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640405"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00081"},{"key":"e_1_3_2_1_48_1","unstructured":"TikTok. 2024. TikTok Recommendation System. \"https:\/\/www.tiktok. com\/transparency\/en-us\/recommendation-system\/\"."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446763"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582029"},{"key":"e_1_3_2_1_51_1","first-page":"15190","article-title":"Dreamshard: Generalizable embedding table placement for recommender systems","volume":"35","author":"Zha Daochen","year":"2022","unstructured":"Daochen Zha, Louis Feng, Qiaoyu Tan, Zirui Liu, Kwei-Herng Lai, Bhargav Bhushanam, Yuandong Tian, Arun Kejariwal, and Xia Hu. 2022. Dreamshard: Generalizable embedding table placement for recommender systems. Advances in Neural Information Processing Systems 35 (2022), 15190--15203.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/1880037.1880038"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2012.6507475"},{"key":"e_1_3_2_1_54_1","volume-title":"Managing Memory Tiers with CXL in Virtualized Environments. In Symposium on Operating Systems Design and Implementation.","author":"Zhong Yuhong","year":"2024","unstructured":"Yuhong Zhong, Daniel S Berger, Carl Waldspurger, Ishwar Agarwal, Rajat Agarwal, Frank Hady, Karthik Kumar, Mark D Hill, Mosharaf Chowdhury, and Asaf Cidon. 2024. Managing Memory Tiers with CXL in Virtualized Environments. In Symposium on Operating Systems Design and Implementation."}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716003","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676641.3716003","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:08:07Z","timestamp":1755774487000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716003"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":54,"alternative-id":["10.1145\/3676641.3716003","10.1145\/3676641"],"URL":"https:\/\/doi.org\/10.1145\/3676641.3716003","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}