{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T01:14:10Z","timestamp":1780708450082,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#1931531"],"award-info":[{"award-number":["#1931531"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#2028929"],"award-info":[{"award-number":["#2028929"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#2116962"],"award-info":[{"award-number":["#2116962"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#2211018"],"award-info":[{"award-number":["#2211018"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#1912495"],"award-info":[{"award-number":["#1912495"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#1909004"],"award-info":[{"award-number":["#1909004"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["#1714389"],"award-info":[{"award-number":["#1714389"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF (National Science Foundation) Chameleon Cloud","award":["CH-819640"],"award-info":[{"award-number":["CH-819640"]}]},{"name":"NSF (National Science Foundation) Chameleon Cloud","award":["CHI-220948"],"award-info":[{"award-number":["CHI-220948"]}]},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589112","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["Optimizing CPU Performance for Recommendation Systems At-Scale"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4017-2093","authenticated-orcid":false,"given":"Rishabh","family":"Jain","sequence":"first","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9954-7986","authenticated-orcid":false,"given":"Scott","family":"Cheng","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3757-9904","authenticated-orcid":false,"given":"Vishwas","family":"Kalagi","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9886-7419","authenticated-orcid":false,"given":"Vrushabh","family":"Sanghavi","sequence":"additional","affiliation":[{"name":"Intel, Portland, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1932-8997","authenticated-orcid":false,"given":"Samvit","family":"Kaul","sequence":"additional","affiliation":[{"name":"Intel, Folsom, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3155-6269","authenticated-orcid":false,"given":"Meena","family":"Arunachalam","sequence":"additional","affiliation":[{"name":"Intel, Portland, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0321-8406","authenticated-orcid":false,"given":"Kiwan","family":"Maeng","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5525-7204","authenticated-orcid":false,"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[{"name":"William &amp; Mary, Williamsburg, VA, USA"},{"name":"University of Virginia, Charlottesville, VA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6173-687X","authenticated-orcid":false,"given":"Anand","family":"Sivasubramaniam","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9940-9951","authenticated-orcid":false,"given":"Mahmut Taylan","family":"Kandemir","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4746-7578","authenticated-orcid":false,"given":"Chita R.","family":"Das","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 802--814","author":"Acun Bilge","year":"2021","unstructured":"Bilge Acun , Matthew Murphy , Xiaodong Wang , Jade Nie , Carole-Jean Wu , and Kim Hazelwood . 2021 . Understanding training efficiency of deep learning recommendation models at scale . In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 802--814 . Bilge Acun, Matthew Murphy, Xiaodong Wang, Jade Nie, Carole-Jean Wu, and Kim Hazelwood. 2021. Understanding training efficiency of deep learning recommendation models at scale. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 802--814."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2017.7863749"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2021. AMD EPYC 7763. \"https:\/\/www.amd.com\/en\/products\/cpu\/amdepyc-7763\".  AMD. 2021. AMD EPYC 7763. \"https:\/\/www.amd.com\/en\/products\/cpu\/amdepyc-7763\"."},{"key":"e_1_3_2_1_4_1","unstructured":"AMD. 2022. AMD Zen3 3D V-Cache. \"https:\/\/www.amd.com\/en\/pressreleases\/2022-03-21-3rd-gen-amd-epyc-processors-amd-3d-v-cachetechnology-deliver-outstanding\".  AMD. 2022. AMD Zen3 3D V-Cache. \"https:\/\/www.amd.com\/en\/pressreleases\/2022-03-21-3rd-gen-amd-epyc-processors-amd-3d-v-cachetechnology-deliver-outstanding\"."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00051"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 13th Symposium on Cloud Computing. 257--272","author":"Bhasi Vivek M","year":"2022","unstructured":"Vivek M Bhasi , Jashwant Raj Gunasekaran , Aakash Sharma , Mahmut Taylan Kandemir , and Chita Das . 2022 . Cypress: input size-sensitive container provisioning and request scheduling for serverless platforms . In Proceedings of the 13th Symposium on Cloud Computing. 257--272 . Vivek M Bhasi, Jashwant Raj Gunasekaran, Aakash Sharma, Mahmut Taylan Kandemir, and Chita Das. 2022. Cypress: input size-sensitive container provisioning and request scheduling for serverless platforms. In Proceedings of the 13th Symposium on Cloud Computing. 257--272."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486992"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/106975.106979"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_11_1","first-page":"156","article-title":"Accelerating slide deep learning on modern cpus: Vectorization, quantizations, memory optimizations, and more","volume":"3","author":"Daghaghi Shabnam","year":"2021","unstructured":"Shabnam Daghaghi , Nicholas Meisburger , Mengnan Zhao , and Anshumali Shrivastava . 2021 . Accelerating slide deep learning on modern cpus: Vectorization, quantizations, memory optimizations, and more . Proceedings of Machine Learning and Systems 3 (2021), 156 -- 166 . Shabnam Daghaghi, Nicholas Meisburger, Mengnan Zhao, and Anshumali Shrivastava. 2021. Accelerating slide deep learning on modern cpus: Vectorization, quantizations, memory optimizations, and more. Proceedings of Machine Learning and Systems 3 (2021), 156--166.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"e_1_3_2_1_13_1","first-page":"40","article-title":"Bandana: Using non-volatile memory for storing deep learning models","volume":"1","author":"Eisenman Assaf","year":"2019","unstructured":"Assaf Eisenman , Maxim Naumov , Darryl Gardner , Misha Smelyanskiy , Sergey Pupyrev , Kim Hazelwood , Asaf Cidon , and Sachin Katti . 2019 . Bandana: Using non-volatile memory for storing deep learning models . Proceedings of Machine Learning and Systems 1 (2019), 40 -- 52 . Assaf Eisenman, Maxim Naumov, Darryl Gardner, Misha Smelyanskiy, Sergey Pupyrev, Kim Hazelwood, Asaf Cidon, and Sachin Katti. 2019. Bandana: Using non-volatile memory for storing deep learning models. Proceedings of Machine Learning and Systems 1 (2019), 40--52.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_14_1","unstructured":"GCC. 2022. GCC Data Prefetch Support. \"https:\/\/gcc.gnu.org\/projects\/prefetch.html\".  GCC. 2022. GCC Data Prefetch Support. \"https:\/\/gcc.gnu.org\/projects\/prefetch.html\"."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2843948"},{"key":"e_1_3_2_1_16_1","unstructured":"Udit Gupta. 2020. DLRM configuration in DeepRecSys RMC3. \"https:\/\/github.com\/harvard-acc\/DeepRecSys\/blob\/master\/models\/configs\/dlrm_rm3.json\".  Udit Gupta. 2020. DLRM configuration in DeepRecSys RMC3. \"https:\/\/github.com\/harvard-acc\/DeepRecSys\/blob\/master\/models\/configs\/dlrm_rm3.json\"."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480127"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/AINAW.2007.345"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00024"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00083"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488423.3519317"},{"key":"e_1_3_2_1_25_1","unstructured":"Intel. 2017. Intel Xeon Gold 6136 Processor. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/120479\/intel-xeon-gold-6136-processor-24-75m-cache-3-00-ghz\/specifications.html\".  Intel. 2017. Intel Xeon Gold 6136 Processor. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/120479\/intel-xeon-gold-6136-processor-24-75m-cache-3-00-ghz\/specifications.html\"."},{"key":"e_1_3_2_1_26_1","unstructured":"Intel. 2019. Intel Cascade Lake Architecture. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/platforms\/details\/cascade-lake.html\".  Intel. 2019. Intel Cascade Lake Architecture. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/platforms\/details\/cascade-lake.html\"."},{"key":"e_1_3_2_1_27_1","unstructured":"Intel. 2021. Intel Ice Lake Architecture. \"https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/codename\/74979\/products-formerly-ice-lake.html\".  Intel. 2021. Intel Ice Lake Architecture. \"https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/codename\/74979\/products-formerly-ice-lake.html\"."},{"key":"e_1_3_2_1_28_1","unstructured":"Intel. 2021. Intel Xeon Silver 4314 Processor. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/215269\/intel-xeon-silver-4314-processor-24m-cache-2-40-ghz\/specifications.html\".  Intel. 2021. Intel Xeon Silver 4314 Processor. \"https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/215269\/intel-xeon-silver-4314-processor-24m-cache-2-40-ghz\/specifications.html\"."},{"key":"e_1_3_2_1_29_1","unstructured":"Intel. 2022. Hardware Prefetchers in Intel CPU. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/intel-sdm.html\".  Intel. 2022. Hardware Prefetchers in Intel CPU. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/intel-sdm.html\"."},{"key":"e_1_3_2_1_30_1","unstructured":"Intel. 2022. Intel C++ Compiler Classic Developer Guide and Reference. \"https:\/\/www.intel.com\/content\/www\/us\/en\/develop\/documentation\/cpp-compiler-developer-guide-and-reference\/top\/compiler-reference\/compiler-options\/advanced-optimization-options\/qopt-prefetch-qopt-prefetch.html\".  Intel. 2022. Intel C++ Compiler Classic Developer Guide and Reference. \"https:\/\/www.intel.com\/content\/www\/us\/en\/develop\/documentation\/cpp-compiler-developer-guide-and-reference\/top\/compiler-reference\/compiler-options\/advanced-optimization-options\/qopt-prefetch-qopt-prefetch.html\"."},{"key":"e_1_3_2_1_31_1","unstructured":"Intel. 2022. Intel Extension for PyTorch. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/vtune-profiler.html\".  Intel. 2022. Intel Extension for PyTorch. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/vtune-profiler.html\"."},{"key":"e_1_3_2_1_32_1","unstructured":"Intel. 2022. Intel VTune Profiler. \"https:\/\/github.com\/intel\/intel-extension-for-pytorch\".  Intel. 2022. Intel VTune Profiler. \"https:\/\/github.com\/intel\/intel-extension-for-pytorch\"."},{"key":"e_1_3_2_1_33_1","unstructured":"Intel. 2022. Pin - A Dynamic Binary Instrumentation Tool. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/tool\/pin-a-dynamic-binary-instrumentation-tool.html\".  Intel. 2022. Pin - A Dynamic Binary Instrumentation Tool. \"https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/tool\/pin-a-dynamic-binary-instrumentation-tool.html\"."},{"key":"e_1_3_2_1_34_1","unstructured":"Intel. 2022. Prefetch Intrinsic. \"https:\/\/www.intel.com\/content\/www\/us\/en\/docs\/intrinsics-guide\/index.html#text=_mm_prefetch\".  Intel. 2022. Prefetch Intrinsic. \"https:\/\/www.intel.com\/content\/www\/us\/en\/docs\/intrinsics-guide\/index.html#text=_mm_prefetch\"."},{"key":"e_1_3_2_1_35_1","unstructured":"Intel. 2023. Intel Xeon Platinum 8480+ Processor. \"https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/231746\/intel-xeon-platinum-8480-processor-105m-cache-2-00-ghz.html\".  Intel. 2023. Intel Xeon Platinum 8480+ Processor. \"https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/231746\/intel-xeon-platinum-8480-processor-105m-cache-2-00-ghz.html\"."},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the Seventeenth European Conference on Computer Systems. 747--764","author":"Jamilan Saba","year":"2022","unstructured":"Saba Jamilan , Tanvir Ahmed Khan , Grant Ayers , Baris Kasikci , and Heiner Litz . 2022 . Apt-get: Profile-guided timely software prefetching . In Proceedings of the Seventeenth European Conference on Computer Systems. 747--764 . Saba Jamilan, Tanvir Ahmed Khan, Grant Ayers, Baris Kasikci, and Heiner Litz. 2022. Apt-get: Profile-guided timely software prefetching. In Proceedings of the Seventeenth European Conference on Computer Systems. 747--764."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00059"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"e_1_3_2_1_40_1","volume-title":"Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 141--144","author":"Ke Liu","year":"2022","unstructured":"Liu Ke , Udit Gupta , Mark Hempsteadis , Carole-Jean Wu , Hsien-Hsin S Lee , and Xuan Zhang . 2022 . Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 141--144 . Liu Ke, Udit Gupta, Mark Hempsteadis, Carole-Jean Wu, Hsien-Hsin S Lee, and Xuan Zhang. 2022. Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 141--144."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534056.3534935"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358284"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture. 860--873","author":"Kwon Youngeun","year":"2022","unstructured":"Youngeun Kwon and Minsoo Rhu . 2022 . Training personalized recommendation systems from (GPU) scratch: look forward not backwards . In Proceedings of the 49th Annual International Symposium on Computer Architecture. 860--873 . Youngeun Kwon and Minsoo Rhu. 2022. Training personalized recommendation systems from (GPU) scratch: look forward not backwards. In Proceedings of the 49th Annual International Symposium on Computer Architecture. 860--873."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2133382.2133384"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446717"},{"key":"e_1_3_2_1_46_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Liu Yizhi","year":"2019","unstructured":"Yizhi Liu , Yao Wang , Ruofei Yu , Mu Li , Vin Sharma , and Yida Wang . 2019 . Optimizing {CNN} Model Inference on {CPUs} . In 2019 USENIX Annual Technical Conference (USENIX ATC 19) . 1025--1040. Yizhi Liu, Yao Wang, Ruofei Yu, Mu Li, Vin Sharma, and Yida Wang. 2019. Optimizing {CNN} Model Inference on {CPUs}. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). 1025--1040."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS51385.2021.00033"},{"key":"e_1_3_2_1_48_1","article-title":"Hyper-Threading Technology Architecture and Microarchitecture","volume":"6","author":"Marr Deborah T","year":"2002","unstructured":"Deborah T Marr , Frank Binns , David L Hill , Glenn Hinton , David A Koufaty , J Alan Miller , and Michael Upton . 2002 . Hyper-Threading Technology Architecture and Microarchitecture . Intel Technology Journal 6 , 1 (2002). Deborah T Marr, Frank Binns, David L Hill, Glenn Hinton, David A Koufaty, J Alan Miller, and Michael Upton. 2002. Hyper-Threading Technology Architecture and Microarchitecture. Intel Technology Journal 6, 1 (2002).","journal-title":"Intel Technology Journal"},{"key":"e_1_3_2_1_49_1","unstructured":"MLPerf. 2022. MLPerf benchmarking on CPUs using Intel Extension for PyTorch. \"https:\/\/github.com\/mlcommons\/inference_results_v2.1\/tree\/master\/closed\/Intel\/code\/dlrm-99.9\/pytorch-cpu\".  MLPerf. 2022. MLPerf benchmarking on CPUs using Intel Extension for PyTorch. \"https:\/\/github.com\/mlcommons\/inference_results_v2.1\/tree\/master\/closed\/Intel\/code\/dlrm-99.9\/pytorch-cpu\"."},{"key":"e_1_3_2_1_50_1","unstructured":"MLPerf. 2022. MLPerf Datacenter Inference Submissions v2.1. \"https:\/\/mlcommons.org\/en\/inference-datacenter-21\/\".  MLPerf. 2022. MLPerf Datacenter Inference Submissions v2.1. \"https:\/\/mlcommons.org\/en\/inference-datacenter-21\/\"."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture. 993--1011","author":"Mudigere Dheevatsa","year":"2022","unstructured":"Dheevatsa Mudigere , Yuchen Hao , Jianyu Huang , Zhihao Jia , Andrew Tulloch , Srinivas Sridharan , Xing Liu , Mustafa Ozdal , Jade Nie , Jongsoo Park , 2022 . Software-hardware co-design for fast and scalable training of deep learning recommendation models . In Proceedings of the 49th Annual International Symposium on Computer Architecture. 993--1011 . Dheevatsa Mudigere, Yuchen Hao, Jianyu Huang, Zhihao Jia, Andrew Tulloch, Srinivas Sridharan, Xing Liu, Mustafa Ozdal, Jade Nie, Jongsoo Park, et al. 2022. Software-hardware co-design for fast and scalable training of deep learning recommendation models. In Proceedings of the 49th Annual International Symposium on Computer Architecture. 993--1011."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2003.1183532"},{"key":"e_1_3_2_1_53_1","volume-title":"2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 195--208","author":"Naithani Ajeya","year":"2021","unstructured":"Ajeya Naithani , Sam Ainsworth , Timothy M Jones , and Lieven Eeckhout . 2021 . Vector runahead . In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 195--208 . Ajeya Naithani, Sam Ainsworth, Timothy M Jones, and Lieven Eeckhout. 2021. Vector runahead. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 195--208."},{"key":"e_1_3_2_1_54_1","volume-title":"Jianyu Huang, Narayanan Sundaraman, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu, Alisson G Azzolini, et al.","author":"Naumov Maxim","year":"2019","unstructured":"Maxim Naumov , Dheevatsa Mudigere , Hao-Jun Michael Shi , Jianyu Huang, Narayanan Sundaraman, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu, Alisson G Azzolini, et al. 2019 . Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 (2019). Maxim Naumov, Dheevatsa Mudigere, Hao-Jun Michael Shi, Jianyu Huang, Narayanan Sundaraman, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu, Alisson G Azzolini, et al. 2019. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 (2019)."},{"key":"e_1_3_2_1_55_1","unstructured":"Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Khudia James Law Parth Malani Andrey Malevich Satish Nadathur etal 2018. Deep learning inference in facebook data centers: Characterization performance optimizations and hardware implications. arXiv preprint arXiv:1811.09886 (2018).  Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Khudia James Law Parth Malani Andrey Malevich Satish Nadathur et al. 2018. Deep learning inference in facebook data centers: Characterization performance optimizations and hardware implications. arXiv preprint arXiv:1811.09886 (2018)."},{"key":"e_1_3_2_1_56_1","unstructured":"Meta Research. 2021. Embedding Lookup Synthetic Dataset. \"https:\/\/github.com\/facebookresearch\/dlrm_datasets\".  Meta Research. 2021. Embedding Lookup Synthetic Dataset. \"https:\/\/github.com\/facebookresearch\/dlrm_datasets\"."},{"key":"e_1_3_2_1_57_1","unstructured":"Meta Research. 2022. DLRM configuration for Criteo Kaggle Training. \"https:\/\/github.com\/facebookresearch\/dlrm\/blob\/main\/bench\/dlrm_s_criteo_kaggle.sh\".  Meta Research. 2022. DLRM configuration for Criteo Kaggle Training. \"https:\/\/github.com\/facebookresearch\/dlrm\/blob\/main\/bench\/dlrm_s_criteo_kaggle.sh\"."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357526.3357536"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486981"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00061"},{"key":"e_1_3_2_1_61_1","volume-title":"31st USENIX Security Symposium (USENIX Security 22)","author":"Taram Mohammadkazem","year":"2022","unstructured":"Mohammadkazem Taram , Xida Ren , Ashish Venkat , and Dean Tullsen . 2022 . {SecSMT} : Securing {SMT} Processors against {Contention-Based} Covert Channels . In 31st USENIX Security Symposium (USENIX Security 22) . 3165--3182. Mohammadkazem Taram, Xida Ren, Ashish Venkat, and Dean Tullsen. 2022. {SecSMT} : Securing {SMT} Processors against {Contention-Based} Covert Channels. In 31st USENIX Security Symposium (USENIX Security 22). 3165--3182."},{"key":"e_1_3_2_1_62_1","volume-title":"Proceedings. 34th ACM\/IEEE International Symposium on Microarchitecture. MICRO-34","author":"Tullsen Dean M","year":"2001","unstructured":"Dean M Tullsen and Jeffery A Brown . 2001 . Handling long-latency loads in a simultaneous multithreading processor . In Proceedings. 34th ACM\/IEEE International Symposium on Microarchitecture. MICRO-34 . IEEE, 318--327. Dean M Tullsen and Jeffery A Brown. 2001. Handling long-latency loads in a simultaneous multithreading processor. In Proceedings. 34th ACM\/IEEE International Symposium on Microarchitecture. MICRO-34. IEEE, 318--327."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/223982.224449"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403284"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124754"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450078"},{"key":"e_1_3_2_1_67_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng , Wencong Xiao , Yinghao Yu , Wei Wang , Cheng Wang , Jian He , Yong Li , Liping Zhang , Wei Lin , and Yu Ding . 2022 . {MLaaS} in the Wild: Workload Analysis and Scheduling in {Large-Scale} Heterogeneous {GPU} Clusters . In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22) . 945--960. Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. {MLaaS} in the Wild: Workload Analysis and Scheduling in {Large-Scale} Heterogeneous {GPU} Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). 945--960."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446763"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00048"},{"key":"e_1_3_2_1_70_1","volume-title":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 4461--4471","author":"Zha Daochen","year":"2022","unstructured":"Daochen Zha , Louis Feng , Bhargav Bhushanam , Dhruv Choudhary , Jade Nie , Yuandong Tian , Jay Chae , Yinbin Ma , Arun Kejariwal , and Xia Hu . 2022 . Autoshard: Automated embedding table sharding for recommender systems . In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 4461--4471 . Daochen Zha, Louis Feng, Bhargav Bhushanam, Dhruv Choudhary, Jade Nie, Yuandong Tian, Jay Chae, Yinbin Ma, Arun Kejariwal, and Xia Hu. 2022. Autoshard: Automated embedding table sharding for recommender systems. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 4461--4471."},{"key":"e_1_3_2_1_71_1","volume-title":"Dreamshard: Generalizable embedding table placement for recommender systems. arXiv preprint arXiv:2210.02023","author":"Zha Daochen","year":"2022","unstructured":"Daochen Zha , Louis Feng , Qiaoyu Tan , Zirui Liu , Kwei-Herng Lai , Bhargav Bhushanam , Yuandong Tian , Arun Kejariwal , and Xia Hu . 2022 . Dreamshard: Generalizable embedding table placement for recommender systems. arXiv preprint arXiv:2210.02023 (2022). Daochen Zha, Louis Feng, Qiaoyu Tan, Zirui Liu, Kwei-Herng Lai, Bhargav Bhushanam, Yuandong Tian, Arun Kejariwal, and Xia Hu. 2022. Dreamshard: Generalizable embedding table placement for recommender systems. arXiv preprint arXiv:2210.02023 (2022)."},{"key":"e_1_3_2_1_72_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture. 1042--1057","author":"Zhao Mark","year":"2022","unstructured":"Mark Zhao , Niket Agarwal , Aarti Basant , Bu\u011fra Gedik , Satadru Pan , Mustafa Ozdal , Rakesh Komuravelli , Jerry Pan , Tianshu Bao , Haowei Lu , 2022 . Understanding data storage and ingestion for large-scale deep recommendation model training: industrial product . In Proceedings of the 49th Annual International Symposium on Computer Architecture. 1042--1057 . Mark Zhao, Niket Agarwal, Aarti Basant, Bu\u011fra Gedik, Satadru Pan, Mustafa Ozdal, Rakesh Komuravelli, Jerry Pan, Tianshu Bao, Haowei Lu, et al. 2022. Understanding data storage and ingestion for large-scale deep recommendation model training: industrial product. In Proceedings of the 49th Annual International Symposium on Computer Architecture. 1042--1057."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346997"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589112","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:40Z","timestamp":1750178800000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589112"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":76,"alternative-id":["10.1145\/3579371.3589112","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589112","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}