{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T15:05:05Z","timestamp":1781622305688,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB4501403"],"award-info":[{"award-number":["2022YFB4501403"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072195"],"award-info":[{"award-number":["62072195"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61825202"],"award-info":[{"award-number":["61825202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61832006"],"award-info":[{"award-number":["61832006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Lab","award":["2022P10AC02"],"award-info":[{"award-number":["2022P10AC02"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589101","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":28,"title":["Accelerating Personalized Recommendation with Cross-level Near-Memory Processing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3319-254X","authenticated-orcid":false,"given":"Haifeng","family":"Liu","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7903-2061","authenticated-orcid":false,"given":"Long","family":"Zheng","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3927-1102","authenticated-orcid":false,"given":"Yu","family":"Huang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2180-5428","authenticated-orcid":false,"given":"Chaoqiang","family":"Liu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5383-4741","authenticated-orcid":false,"given":"Xiangyu","family":"Ye","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1444-2910","authenticated-orcid":false,"given":"Jingrui","family":"Yuan","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"},{"name":"Zhejiang Lab, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6302-813X","authenticated-orcid":false,"given":"Xiaofei","family":"Liao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3934-7605","authenticated-orcid":false,"given":"Hai","family":"Jin","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0380-3506","authenticated-orcid":false,"given":"Jingling","family":"Xue","sequence":"additional","affiliation":[{"name":"University of New South Wales, Sydney, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2013. Criteo AI Labs Ad Terabyte. https:\/\/labs.criteo.com\/2013\/12\/download-terabyte-click-logs\/.  2013. Criteo AI Labs Ad Terabyte. https:\/\/labs.criteo.com\/2013\/12\/download-terabyte-click-logs\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2014. Criteo AI Labs Ad Kaggle. https:\/\/labs.criteo.com\/2014\/02\/kaggle-display-advertising-challenge-dataset.  2014. Criteo AI Labs Ad Kaggle. https:\/\/labs.criteo.com\/2014\/02\/kaggle-display-advertising-challenge-dataset."},{"key":"e_1_3_2_1_3_1","unstructured":"2023. Gurobi Optimizer Reference Manual. https:\/\/www.gurobi.com.  2023. Gurobi Optimizer Reference Manual. https:\/\/www.gurobi.com."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Acun Bilge","year":"2021","unstructured":"Bilge Acun , Matthew Murphy , Xiaodong Wang , Jade Nie , Carole-Jean Wu , and Kim M. Hazelwood . 2021. Understanding Training Efficiency of Deep Learning Recommendation Models at Scale . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021 . IEEE, 802--814. Bilge Acun, Matthew Murphy, Xiaodong Wang, Jade Nie, Carole-Jean Wu, and Kim M. Hazelwood. 2021. Understanding Training Efficiency of Deep Learning Recommendation Models at Scale. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 802--814."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Asgari Bahar","year":"2021","unstructured":"Bahar Asgari , Ramyad Hadidi , Jiashen Cao , Da Eun Shim , Sung Kyu Lim , and Hyesoon Kim . 2021 . FAFNIR: Accelerating Sparse Gathering by Using Efficient Near-Memory Intelligent Reduction . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 908--920. Bahar Asgari, Ramyad Hadidi, Jiashen Cao, Da Eun Shim, Sung Kyu Lim, and Hyesoon Kim. 2021. FAFNIR: Accelerating Sparse Gathering by Using Efficient Near-Memory Intelligent Reduction. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 908--920."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1110.1371"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA","author":"Kai-Wei Chang Kevin","year":"2014","unstructured":"Kevin Kai-Wei Chang , Donghyuk Lee , Zeshan Chishti , Alaa R. Alameldeen , Chris Wilkerson , Yoongu Kim , and Onur Mutlu . 2014 . Improving DRAM performance by parallelizing refreshes with accesses . In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2014. IEEE, 356--367. Kevin Kai-Wei Chang, Donghyuk Lee, Zeshan Chishti, Alaa R. Alameldeen, Chris Wilkerson, Yoongu Kim, and Onur Mutlu. 2014. Improving DRAM performance by parallelizing refreshes with accesses. In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2014. IEEE, 356--367."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the IEEE International Parallel and Distributed Processing Symposium, IPDPS","author":"Chen Dan","year":"2022","unstructured":"Dan Chen , Hai Jin , Long Zheng , Yu Huang , Pengcheng Yao , Chuangyi Gui , Qinggang Wang , Haifeng Liu , Haiheng He , Xiaofei Liao , and Ran Zheng . 2022 . A General Offloading Approach for Near-DRAM Processing-In-Memory Architectures . In Proceedings of the IEEE International Parallel and Distributed Processing Symposium, IPDPS 2022. IEEE, 246--257. Dan Chen, Hai Jin, Long Zheng, Yu Huang, Pengcheng Yao, Chuangyi Gui, Qinggang Wang, Haifeng Liu, Haiheng He, Xiaofei Liao, and Ran Zheng. 2022. A General Offloading Approach for Near-DRAM Processing-In-Memory Architectures. In Proceedings of the IEEE International Parallel and Distributed Processing Symposium, IPDPS 2022. IEEE, 246--257."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 27--39","author":"Chi Ping","year":"2016","unstructured":"Ping Chi , Shuangchen Li , Cong Xu , Tao Zhang , Jishen Zhao , Yongpan Liu , Yu Wang , and Yuan Xie . 2016 . PRIME: A Novel Processing-in-Memory Architecture for Neural Network Computation in ReRAM-Based Main Memory . In Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 27--39 . Ping Chi, Shuangchen Li, Cong Xu, Tao Zhang, Jishen Zhao, Yongpan Liu, Yu Wang, and Yuan Xie. 2016. PRIME: A Novel Processing-in-Memory Architecture for Neural Network Computation in ReRAM-Based Main Memory. In Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 27--39."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2022. USENIX Association, 929--943","author":"Eisenman Assaf","year":"2022","unstructured":"Assaf Eisenman , Kiran Kumar Matam , Steven Ingram , Dheevatsa Mudigere , Raghuraman Krishnamoorthi , Krishnakumar Nair , Misha Smelyanskiy , and Murali Annavaram . 2022 . Check-N-Run: a Checkpointing System for Training Deep Learning Recommendation Models . In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2022. USENIX Association, 929--943 . Assaf Eisenman, Kiran Kumar Matam, Steven Ingram, Dheevatsa Mudigere, Raghuraman Krishnamoorthi, Krishnakumar Nair, Misha Smelyanskiy, and Murali Annavaram. 2022. Check-N-Run: a Checkpointing System for Training Deep Learning Recommendation Models. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2022. USENIX Association, 929--943."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA","author":"Farahani Amin Farmahini","year":"2015","unstructured":"Amin Farmahini Farahani , Jung Ho Ahn , Katherine Morrow , and Nam Sung Kim . 2015 . NDA: Near-DRAM acceleration architecture leveraging commodity DRAM devices and standard memory modules . In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2015. IEEE, 283--295. Amin Farmahini Farahani, Jung Ho Ahn, Katherine Morrow, and Nam Sung Kim. 2015. NDA: Near-DRAM acceleration architecture leveraging commodity DRAM devices and standard memory modules. In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2015. IEEE, 283--295."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the IEEE International Symposium on Information Theory, ISIT","author":"Ginart Antonio A.","year":"2021","unstructured":"Antonio A. Ginart , Maxim Naumov , Dheevatsa Mudigere , Jiyan Yang , and James Zou . 2021 . Mixed Dimension Embeddings with Application to Memory-Efficient Recommendation Systems . In Proceedings of the IEEE International Symposium on Information Theory, ISIT 2021. IEEE, 2786--2791. Antonio A. Ginart, Maxim Naumov, Dheevatsa Mudigere, Jiyan Yang, and James Zou. 2021. Mixed Dimension Embeddings with Application to Memory-Efficient Recommendation Systems. In Proceedings of the IEEE International Symposium on Information Theory, ISIT 2021. IEEE, 2786--2791."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2843948"},{"key":"e_1_3_2_1_15_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li Xiuqiang He and Zhenhua Dong. 2018. DeepFM: An End-to-End Wide & Deep Learning Framework for CTR Prediction. CoRR abs\/1804.04950 (2018).  Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li Xiuqiang He and Zhenhua Dong. 2018. DeepFM: An End-to-End Wide & Deep Learning Framework for CTR Prediction. CoRR abs\/1804.04950 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 982--995","author":"Gupta Udit","year":"2020","unstructured":"Udit Gupta , Samuel Hsia , Vikram Saraph , Xiaodong Wang , Brandon Reagen , Gu-Yeon Wei , Hsien-Hsin S. Lee , David Brooks , and Carole-Jean Wu . 2020 . Deep-RecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference . In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 982--995 . Udit Gupta, Samuel Hsia, Vikram Saraph, Xiaodong Wang, Brandon Reagen, Gu-Yeon Wei, Hsien-Hsin S. Lee, David Brooks, and Carole-Jean Wu. 2020. Deep-RecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference. In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 982--995."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 870--884","author":"Gupta Udit","year":"2021","unstructured":"Udit Gupta , Samuel Hsia , Jeff Zhang , Mark Wilkening , Javin Pombra , Hsien-Hsin Sean Lee , Gu-Yeon Wei , Carole-Jean Wu , and David Brooks . 2021 . RecPipe: Co-designing Models and Hardware to Jointly Optimize Recommendation Quality and Performance . In Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 870--884 . Udit Gupta, Samuel Hsia, Jeff Zhang, Mark Wilkening, Javin Pombra, Hsien-Hsin Sean Lee, Gu-Yeon Wei, Carole-Jean Wu, and David Brooks. 2021. RecPipe: Co-designing Models and Hardware to Jointly Optimize Recommendation Quality and Performance. In Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 870--884."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA","author":"Gupta Udit","year":"2020","unstructured":"Udit Gupta , Carole-Jean Wu , Xiaodong Wang , Maxim Naumov , Brandon Reagen , David Brooks , Bradford Cottel , Kim M. Hazelwood , Mark Hempstead , Bill Jia , Hsien-Hsin S. Lee , Andrey Malevich , Dheevatsa Mudigere , Mikhail Smelyanskiy , Liang Xiong , and Xuan Zhang . 2020 . The Architectural Implications of Facebook's DNN-Based Personalized Recommendation . In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2020. IEEE, 488--501. Udit Gupta, Carole-Jean Wu, Xiaodong Wang, Maxim Naumov, Brandon Reagen, David Brooks, Bradford Cottel, Kim M. Hazelwood, Mark Hempstead, Bill Jia, Hsien-Hsin S. Lee, Andrey Malevich, Dheevatsa Mudigere, Mikhail Smelyanskiy, Liang Xiong, and Xuan Zhang. 2020. The Architectural Implications of Facebook's DNN-Based Personalized Recommendation. In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2020. IEEE, 488--501."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA","author":"Hazelwood Kim M.","year":"2018","unstructured":"Kim M. Hazelwood , Sarah Bird , David M. Brooks , Soumith Chintala , Utku Diril , Dmytro Dzhulgakov , Mohamed Fawzy , Bill Jia , Yangqing Jia , Aditya Kalro , James Law , Kevin Lee , Jason Lu , Pieter Noordhuis , Misha Smelyanskiy , Liang Xiong , and Xiaodong Wang . 2018 . Applied Machine Learning at Facebook: A Datacenter Infrastructure Perspective . In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2018. IEEE, 620--629. Kim M. Hazelwood, Sarah Bird, David M. Brooks, Soumith Chintala, Utku Diril, Dmytro Dzhulgakov, Mohamed Fawzy, Bill Jia, Yangqing Jia, Aditya Kalro, James Law, Kevin Lee, Jason Lu, Pieter Noordhuis, Misha Smelyanskiy, Liang Xiong, and Xiaodong Wang. 2018. Applied Machine Learning at Facebook: A Datacenter Infrastructure Perspective. In Proceedings of the IEEE International Symposium on High Performance Computer Architecture, HPCA 2018. IEEE, 620--629."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.2753\/MIS0742-1222270402"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Huang Yu","year":"2022","unstructured":"Yu Huang , Long Zheng , Pengcheng Yao , Qinggang Wang , Xiaofei Liao , Hai Jin , and Jingling Xue . 2022 . Accelerating Graph Convolutional Networks Using Crossbar-based Processing-In-Memory Architectures . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 1029--1042. Yu Huang, Long Zheng, Pengcheng Yao, Qinggang Wang, Xiaofei Liao, Hai Jin, and Jingling Xue. 2022. Accelerating Graph Convolutional Networks Using Crossbar-based Processing-In-Memory Architectures. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 1029--1042."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3199152"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 968--981","author":"Hwang Ranggi","year":"2020","unstructured":"Ranggi Hwang , Taehun Kim , Youngeun Kwon , and Minsoo Rhu . 2020 . Centaur: A Chiplet-based, Hybrid Sparse-Dense Accelerator for Personalized Recommendations . In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 968--981 . Ranggi Hwang, Taehun Kim, Youngeun Kwon, and Minsoo Rhu. 2020. Centaur: A Chiplet-based, Hybrid Sparse-Dense Accelerator for Personalized Recommendations. In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 968--981."},{"key":"e_1_3_2_1_24_1","volume-title":"VLSI Memory Chip Design","author":"Itoh Kiyoo","unstructured":"Kiyoo Itoh . 2013. VLSI Memory Chip Design . Vol. 5 . Springer Science & Business Media . Kiyoo Itoh. 2013. VLSI Memory Chip Design. Vol. 5. Springer Science & Business Media."},{"key":"e_1_3_2_1_25_1","unstructured":"JEDEC. 2013. High Bandwidth Memory (HBM) DRAM.  JEDEC. 2013. High Bandwidth Memory (HBM) DRAM."},{"key":"e_1_3_2_1_26_1","unstructured":"JEDEC. 2017. DDR4 SDRAM Standard.  JEDEC. 2017. DDR4 SDRAM Standard."},{"key":"e_1_3_2_1_27_1","unstructured":"JEDEC. 2021. DDR5 SDRAM Standard.  JEDEC. 2021. DDR5 SDRAM Standard."},{"key":"e_1_3_2_1_28_1","volume-title":"Context-aware Deep Model for Entity Recommendation in Search Engine at Alibaba. CoRR abs\/1909.04493","author":"Jia Qianghuai","year":"2019","unstructured":"Qianghuai Jia , Ningyu Zhang , and Nengwei Hua . 2019. Context-aware Deep Model for Entity Recommendation in Search Engine at Alibaba. CoRR abs\/1909.04493 ( 2019 ). Qianghuai Jia, Ningyu Zhang, and Nengwei Hua. 2019. Context-aware Deep Model for Entity Recommendation in Search Engine at Alibaba. CoRR abs\/1909.04493 (2019)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2014.2334635"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 679--691","author":"Kal Hongju","year":"2021","unstructured":"Hongju Kal , Seokmin Lee , Gun Ko , and Won Woo Ro . 2021 . SPACE: Locality-Aware Processing in Heterogeneous Memory for Personalized Recommendations . In Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 679--691 . Hongju Kal, Seokmin Lee, Gun Ko, and Won Woo Ro. 2021. SPACE: Locality-Aware Processing in Heterogeneous Memory for Personalized Recommendations. In Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 679--691."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433758"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/800057.808695"},{"key":"e_1_3_2_1_33_1","volume-title":"DRAM Circuit Design: Fundamental and High-speed Topics","author":"Keeth Brent","unstructured":"Brent Keeth , R. Jacob Baker , Brian Johnson , and Feng Lin . 2007. DRAM Circuit Design: Fundamental and High-speed Topics . Vol. 13 . John Wiley & Sons . Brent Keeth, R. Jacob Baker, Brian Johnson, and Feng Lin. 2007. DRAM Circuit Design: Fundamental and High-speed Topics. Vol. 13. John Wiley & Sons."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the IEEE International Solid-State Circuits Conference, ISSCC","author":"Kim Dongkyun","year":"2019","unstructured":"Dongkyun Kim , Minsu Park , Sungchun Jang , Jun-Yong Song , Hankyu Chi , Geunho Choi , Sunmyung Choi , Jaeil Kim , Changhyun Kim , Kyung Whan Kim , Kibong Koo , Seonghwi Song , Yongmi Kim , Dong-Uk Lee , Jaejin Lee , Dae Suk Kim , Ki Hun Kwon , Minsik Han , Byeongchan Choi , Hongjung Kim , Sanghyun Ku , Yeonuk Kim , Jong-Sam Kim , Sanghui Kim , Youngsuk Seo , Seungwook Oh , Dain Im , Haksong Kim , Jonghyuck Choi , Jinil Chung , Changhyun Lee , Yongsung Lee , Joo-Hwan Cho , Junhyun Chun , and Jonghoon Oh . 2019 . 23.2A 1.1V 1ynm 6.4Gb\/s\/pin 16Gb DDR5 SDRAM with a Phase-Rotator-Based DLL, High-Speed SerDes and RX\/TX Equalization Scheme . In Proceedings of the IEEE International Solid-State Circuits Conference, ISSCC 2019. IEEE, 380--382. Dongkyun Kim, Minsu Park, Sungchun Jang, Jun-Yong Song, Hankyu Chi, Geunho Choi, Sunmyung Choi, Jaeil Kim, Changhyun Kim, Kyung Whan Kim, Kibong Koo, Seonghwi Song, Yongmi Kim, Dong-Uk Lee, Jaejin Lee, Dae Suk Kim, Ki Hun Kwon, Minsik Han, Byeongchan Choi, Hongjung Kim, Sanghyun Ku, Yeonuk Kim, Jong-Sam Kim, Sanghui Kim, Youngsuk Seo, Seungwook Oh, Dain Im, Haksong Kim, Jonghyuck Choi, Jinil Chung, Changhyun Lee, Yongsung Lee, Joo-Hwan Cho, Junhyun Chun, and Jonghoon Oh. 2019. 23.2A 1.1V 1ynm 6.4Gb\/s\/pin 16Gb DDR5 SDRAM with a Phase-Rotator-Based DLL, High-Speed SerDes and RX\/TX Equalization Scheme. In Proceedings of the IEEE International Solid-State Circuits Conference, ISSCC 2019. IEEE, 380--382."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/2337159.2337202"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2414456"},{"key":"e_1_3_2_1_37_1","unstructured":"Neal Koyle and Sujeet Ayyapureddi. 2020. Micron \u00ae DDR5: Key Module Features.  Neal Koyle and Sujeet Ayyapureddi. 2020. Micron \u00ae DDR5: Key Module Features."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019. ACM, 740--753","author":"Kwon Youngeun","year":"2019","unstructured":"Youngeun Kwon , Yunjae Lee , and Minsoo Rhu . 2019 . TensorDIMM: A Practical Near-Memory Processing Architecture for Embeddings and Tensor Operations in Deep Learning . In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019. ACM, 740--753 . Youngeun Kwon, Yunjae Lee, and Minsoo Rhu. 2019. TensorDIMM: A Practical Near-Memory Processing Architecture for Embeddings and Tensor Operations in Deep Learning. In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019. ACM, 740--753."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Kwon Youngeun","year":"2021","unstructured":"Youngeun Kwon , Yunjae Lee , and Minsoo Rhu . 2021 . Tensor Casting: Co-Designing Algorithm-Architecture for Personalized Recommendation Training . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 235--248. Youngeun Kwon, Yunjae Lee, and Minsoo Rhu. 2021. Tensor Casting: Co-Designing Algorithm-Architecture for Personalized Recommendation Training. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 235--248."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 860--873","author":"Kwon Youngeun","year":"2022","unstructured":"Youngeun Kwon and Minsoo Rhu . 2022 . Training personalized recommendation systems from (GPU) scratch: look forward not backwards . In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 860--873 . Youngeun Kwon and Minsoo Rhu. 2022. Training personalized recommendation systems from (GPU) scratch: look forward not backwards. In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 860--873."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 43--56","author":"Lee Suk Han","year":"2021","unstructured":"Suk Han Lee , Shinhaeng Kang , Jaehoon Lee , Hyeonsu Kim , Eojin Lee , Seungwoo Seo , Hosang Yoon , Seungwon Lee , Kyounghwan Lim , Hyunsung Shin , Jinhyun Kim , Seongil O, Anand Iyer , David Wang , Kyomin Sohn , and Nam Sung Kim . 2021 . Hardware Architecture and Software Stack for PIM Based on Commercial DRAM Technology : Industrial Product . In Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 43--56 . Suk Han Lee, Shinhaeng Kang, Jaehoon Lee, Hyeonsu Kim, Eojin Lee, Seungwoo Seo, Hosang Yoon, Seungwon Lee, Kyounghwan Lim, Hyunsung Shin, Jinhyun Kim, Seongil O, Anand Iyer, David Wang, Kyomin Sohn, and Nam Sung Kim. 2021. Hardware Architecture and Software Stack for PIM Based on Commercial DRAM Technology : Industrial Product. In Proceedings of the 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021. IEEE, 43--56."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 790--803","author":"Liu Ke","year":"2020","unstructured":"Ke Liu , Udit Gupta , Benjamin Youngjae Cho , David Brooks , Vikas Chandra , Utku Diril , Amin Firoozshahian , Kim M. Hazelwood , Bill Jia , Hsien-Hsin S. Lee , Meng Li , Bert Maher , Dheevatsa Mudigere , Maxim Naumov , Martin Schatz , Mikhail Smelyanskiy , Xiaodong Wang , Brandon Reagen , Carole-Jean Wu , Mark Hempstead , and Xuan Zhang . 2020 . RecNMP: Accelerating Personalized Recommendation with Near-Memory Processing . In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 790--803 . Ke Liu, Udit Gupta, Benjamin Youngjae Cho, David Brooks, Vikas Chandra, Utku Diril, Amin Firoozshahian, Kim M. Hazelwood, Bill Jia, Hsien-Hsin S. Lee, Meng Li, Bert Maher, Dheevatsa Mudigere, Maxim Naumov, Martin Schatz, Mikhail Smelyanskiy, Xiaodong Wang, Brandon Reagen, Carole-Jean Wu, Mark Hempstead, and Xuan Zhang. 2020. RecNMP: Accelerating Personalized Recommendation with Near-Memory Processing. In Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020. IEEE, 790--803."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Liu Ke","year":"2022","unstructured":"Ke Liu , Udit Gupta , Mark Hempstead , Carole-Jean Wu , Hsien-Hsin S. Lee , and Xuan Zhang . 2022 . Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 141--144. Ke Liu, Udit Gupta, Mark Hempstead, Carole-Jean Wu, Hsien-Hsin S. Lee, and Xuan Zhang. 2022. Hercules: Heterogeneity-Aware Inference Serving for At-Scale Personalized Recommendation. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 141--144."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1109\/MM.2021.3097700","article-title":"Near-Memory Processing in Action: Accelerating Personalized Recommendation With AxDIMM","volume":"42","author":"Liu Ke","year":"2022","unstructured":"Ke Liu , Xuan Zhang , Jinin So , Jong-Geon Lee , Shinhaeng Kang , Sukhan Lee , Songyi Han , YeonGon Cho , Jin Hyun Kim , Yongsuk Kwon , KyungSoo Kim , Jin Jung , IlKwon Yun , Sung Joo Park , Hyunsun Park , Joon-Ho Song , Jeonghyeon Cho , Kyomin Sohn , Nam Sung Kim , and Hsien-Hsin S. Lee . 2022 . Near-Memory Processing in Action: Accelerating Personalized Recommendation With AxDIMM . IEEE Micro 42 , 1 (2022), 116 -- 127 . Ke Liu, Xuan Zhang, Jinin So, Jong-Geon Lee, Shinhaeng Kang, Sukhan Lee, Songyi Han, YeonGon Cho, Jin Hyun Kim, Yongsuk Kwon, KyungSoo Kim, Jin Jung, IlKwon Yun, Sung Joo Park, Hyunsun Park, Joon-Ho Song, Jeonghyeon Cho, Kyomin Sohn, Nam Sung Kim, and Hsien-Hsin S. Lee. 2022. Near-Memory Processing in Action: Accelerating Personalized Recommendation With AxDIMM. IEEE Micro 42, 1 (2022), 116--127.","journal-title":"IEEE Micro"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","DOI":"10.1147\/JRD.2015.2429031","article-title":"The IBM z13 memory subsystem for big data","volume":"59","author":"Meaney P. J.","year":"2015","unstructured":"P. J. Meaney , L. D. Curley , G. D. Gilda , M. R. Hodges , D. J. Buerkle , R. D. Siegl , and R. K. Dong . 2015 . The IBM z13 memory subsystem for big data . IBM Journal of Research and Development 59 , 4\/5 (2015). P. J. Meaney, L. D. Curley, G. D. Gilda, M. R. Hodges, D. J. Buerkle, R. D. Siegl, and R. K. Dong. 2015. The IBM z13 memory subsystem for big data. IBM Journal of Research and Development 59, 4\/5 (2015).","journal-title":"IBM Journal of Research and Development"},{"key":"e_1_3_2_1_46_1","volume-title":"Micron: System Power Calculator (DDR4).","year":"2017","unstructured":"Micron. 2017 . Micron: System Power Calculator (DDR4). Micron. 2017. Micron: System Power Calculator (DDR4)."},{"key":"e_1_3_2_1_47_1","first-page":"1","article-title":"Chameleon: Versatile and practical near-DRAM acceleration architecture for large memory systems. In Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2016","volume":"50","author":"Moghaddam Hadi Asghari","year":"2016","unstructured":"Hadi Asghari Moghaddam , Young Hoon Son , Jung Ho Ahn , and Nam Sung Kim . 2016 . Chameleon: Versatile and practical near-DRAM acceleration architecture for large memory systems. In Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2016 . IEEE , 50 : 1 -- 50 :13. Hadi Asghari Moghaddam, Young Hoon Son, Jung Ho Ahn, and Nam Sung Kim. 2016. Chameleon: Versatile and practical near-DRAM acceleration architecture for large memory systems. In Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2016. IEEE, 50:1--50:13.","journal-title":"IEEE"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 993--1011","author":"Mudigere Dheevatsa","year":"2022","unstructured":"Dheevatsa Mudigere , Yuchen Hao , Jianyu Huang , Zhihao Jia , Andrew Tulloch , Srinivas Sridharan , Xing Liu , Mustafa Ozdal , Jade Nie , Jongsoo Park , Liang Luo , Jie Amy Yang , Leon Gao , Dmytro Ivchenko , Aarti Basant , Yuxi Hu , Jiyan Yang , Ehsan K. Ardestani , Xiaodong Wang , Rakesh Komuravelli , Ching-Hsiang Chu , Serhat Yilmaz , Huayu Li , Jiyuan Qian , Zhuobo Feng , Yinbin Ma , Junjie Yang , Ellie Wen , Hong Li , Lin Yang , Chonglin Sun , Whitney Zhao , Dimitry Melts , Krishna Dhulipala , K. R. Kishore , Tyler Graf , Assaf Eisenman , Kiran Kumar Matam , Adi Gangidi , Guoqiang Jerry Chen , Manoj Krishnan , Avinash Nayak , Krishnakumar Nair , Bharath Muthiah , Mahmoud khorashadi, Pallab Bhattacharya , Petr Lapukhov , Maxim Naumov , Ajit Mathews , Lin Qiao , Mikhail Smelyanskiy , Bill Jia , and Vijay Rao . 2022 . Software-hardware co-design for fast and scalable training of deep learning recommendation models . In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 993--1011 . Dheevatsa Mudigere, Yuchen Hao, Jianyu Huang, Zhihao Jia, Andrew Tulloch, Srinivas Sridharan, Xing Liu, Mustafa Ozdal, Jade Nie, Jongsoo Park, Liang Luo, Jie Amy Yang, Leon Gao, Dmytro Ivchenko, Aarti Basant, Yuxi Hu, Jiyan Yang, Ehsan K. Ardestani, Xiaodong Wang, Rakesh Komuravelli, Ching-Hsiang Chu, Serhat Yilmaz, Huayu Li, Jiyuan Qian, Zhuobo Feng, Yinbin Ma, Junjie Yang, Ellie Wen, Hong Li, Lin Yang, Chonglin Sun, Whitney Zhao, Dimitry Melts, Krishna Dhulipala, K. R. Kishore, Tyler Graf, Assaf Eisenman, Kiran Kumar Matam, Adi Gangidi, Guoqiang Jerry Chen, Manoj Krishnan, Avinash Nayak, Krishnakumar Nair, Bharath Muthiah, Mahmoud khorashadi, Pallab Bhattacharya, Petr Lapukhov, Maxim Naumov, Ajit Mathews, Lin Qiao, Mikhail Smelyanskiy, Bill Jia, and Vijay Rao. 2022. Software-hardware co-design for fast and scalable training of deep learning recommendation models. In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 993--1011."},{"key":"e_1_3_2_1_49_1","volume-title":"A Modern Primer on Processing in Memory. CoRR abs\/2012.03112","author":"Mutlu Onur","year":"2020","unstructured":"Onur Mutlu , Saugata Ghose , Juan G\u00f3mez-Luna , and Rachata Ausavarungnirun . 2020. A Modern Primer on Processing in Memory. CoRR abs\/2012.03112 ( 2020 ). Onur Mutlu, Saugata Ghose, Juan G\u00f3mez-Luna, and Rachata Ausavarungnirun. 2020. A Modern Primer on Processing in Memory. CoRR abs\/2012.03112 (2020)."},{"key":"e_1_3_2_1_50_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091 (2019).  Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091 (2019)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","first-page":"113120","DOI":"10.1016\/j.dss.2019.113120","article-title":"The superhit effect and long tail phenomenon in the context of electronic word of mouth","volume":"125","author":"Olmedilla Mar\u00eda","year":"2019","unstructured":"Mar\u00eda Olmedilla , M. Roc\u00edo Mart\u00ednez-Torres , and Sergio L. Toral Mar\u00edn . 2019 . The superhit effect and long tail phenomenon in the context of electronic word of mouth . Decision Support Systems 125 (2019), 113120 . Mar\u00eda Olmedilla, M. Roc\u00edo Mart\u00ednez-Torres, and Sergio L. Toral Mar\u00edn. 2019. The superhit effect and long tail phenomenon in the context of electronic word of mouth. Decision Support Systems 125 (2019), 113120.","journal-title":"Decision Support Systems"},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 268--281","author":"Park Jaehyun","year":"2021","unstructured":"Jaehyun Park , Byeongho Kim , Sungmin Yun , Eojin Lee , Minsoo Rhu , and Jung Ho Ahn . 2021 . TRiM: Enhancing Processor-Memory Interfaces with Scalable Tensor Reduction in Memory . In Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 268--281 . Jaehyun Park, Byeongho Kim, Sungmin Yun, Eojin Lee, Minsoo Rhu, and Jung Ho Ahn. 2021. TRiM: Enhancing Processor-Memory Interfaces with Scalable Tensor Reduction in Memory. In Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2021. ACM, 268--281."},{"key":"e_1_3_2_1_53_1","unstructured":"Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Shanker Khudia James Law Parth Malani Andrey Malevich Nadathur Satish Juan Miguel Pino Martin Schatz Alexander Sidorov Viswanath Sivakumar Andrew Tulloch Xiaodong Wang Yiming Wu Hector Yuen Utku Diril Dmytro Dzhulgakov Kim M. Hazelwood Bill Jia Yangqing Jia Lin Qiao Vijay Rao Nadav Rotem Sungjoo Yoo and Mikhail Smelyanskiy. 2018. Deep Learning Inference in Facebook Data Centers: Characterization Performance Optimizations and Hardware Implications. CoRR abs\/1811.09886 (2018).  Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Shanker Khudia James Law Parth Malani Andrey Malevich Nadathur Satish Juan Miguel Pino Martin Schatz Alexander Sidorov Viswanath Sivakumar Andrew Tulloch Xiaodong Wang Yiming Wu Hector Yuen Utku Diril Dmytro Dzhulgakov Kim M. Hazelwood Bill Jia Yangqing Jia Lin Qiao Vijay Rao Nadav Rotem Sungjoo Yoo and Mikhail Smelyanskiy. 2018. Deep Learning Inference in Facebook Data Centers: Characterization Performance Optimizations and Hardware Implications. CoRR abs\/1811.09886 (2018)."},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the IEEE Hot Chips 23 Symposium, HCS 2011. IEEE, 1--24","author":"Pawlowski J. Thomas","year":"2011","unstructured":"J. Thomas Pawlowski . 2011 . Hybrid memory cube (HMC) . In Proceedings of the IEEE Hot Chips 23 Symposium, HCS 2011. IEEE, 1--24 . J. Thomas Pawlowski. 2011. Hybrid memory cube (HMC). In Proceedings of the IEEE Hot Chips 23 Symposium, HCS 2011. IEEE, 1--24."},{"key":"e_1_3_2_1_55_1","volume-title":"Amazon Product Recommender System. CoRR abs\/2102.04238","author":"Rezaei Mohammad R.","year":"2021","unstructured":"Mohammad R. Rezaei . 2021. Amazon Product Recommender System. CoRR abs\/2102.04238 ( 2021 ). Mohammad R. Rezaei. 2021. Amazon Product Recommender System. CoRR abs\/2102.04238 (2021)."},{"key":"e_1_3_2_1_56_1","volume-title":"Proceedings of the 27th International Symposium on Computer Architecture, ISCA 2000. IEEE, 128--138","author":"Rixner Scott","unstructured":"Scott Rixner , William J. Dally , Ujval J. Kapasi , Peter R. Mattson , and John D. Owens . 2000. Memory access scheduling . In Proceedings of the 27th International Symposium on Computer Architecture, ISCA 2000. IEEE, 128--138 . Scott Rixner, William J. Dally, Ujval J. Kapasi, Peter R. Mattson, and John D. Owens. 2000. Memory access scheduling. In Proceedings of the 27th International Symposium on Computer Architecture, ISCA 2000. IEEE, 128--138."},{"key":"e_1_3_2_1_57_1","volume-title":"Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2022. ACM, 344--358","author":"Sethi Geet","year":"2022","unstructured":"Geet Sethi , Bilge Acun , Niket Agarwal , Christos Kozyrakis , Caroline Trippel , and Carole-Jean Wu . 2022 . RecShard: statistical feature-based memory optimization for industry-scale neural recommendation . In Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2022. ACM, 344--358 . Geet Sethi, Bilge Acun, Niket Agarwal, Christos Kozyrakis, Caroline Trippel, and Carole-Jean Wu. 2022. RecShard: statistical feature-based memory optimization for industry-scale neural recommendation. In Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2022. ACM, 344--358."},{"key":"e_1_3_2_1_58_1","volume-title":"Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 14--26","author":"Shafiee Ali","year":"2016","unstructured":"Ali Shafiee , Anirban Nag , Naveen Muralimanohar , Rajeev Balasubramonian , John Paul Strachan , Miao Hu , R. Stanley Williams , and Vivek Srikumar . 2016 . ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars . In Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 14--26 . Ali Shafiee, Anirban Nag, Naveen Muralimanohar, Rajeev Balasubramonian, John Paul Strachan, Miao Hu, R. Stanley Williams, and Vivek Srikumar. 2016. ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016. IEEE, 14--26."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, SIGKDD","author":"Michael Shi Hao-Jun","year":"2020","unstructured":"Hao-Jun Michael Shi , Dheevatsa Mudigere , Maxim Naumov , and Jiyan Yang . 2020 . Compositional Embeddings Using Complementary Partitions for Memory-Efficient Recommendation Systems . In Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, SIGKDD , 2020. ACM, 165--175. Hao-Jun Michael Shi, Dheevatsa Mudigere, Maxim Naumov, and Jiyan Yang. 2020. Compositional Embeddings Using Complementary Partitions for Memory-Efficient Recommendation Systems. In Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, SIGKDD, 2020. ACM, 165--175."},{"key":"e_1_3_2_1_60_1","volume-title":"Linear and Integer Optimization: Theory and Practice","author":"Sierksma Gerard","unstructured":"Gerard Sierksma and Yori Zwols . 2015. Linear and Integer Optimization: Theory and Practice . CRC Press . Gerard Sierksma and Yori Zwols. 2015. Linear and Integer Optimization: Theory and Practice. CRC Press."},{"key":"e_1_3_2_1_61_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Sun Xuan","year":"2022","unstructured":"Xuan Sun , Hu Wan , Qiao Li , Chia-Lin Yang , Tei-Wei Kuo , and Chun Jason Xue . 2022 . RM-SSD: In-Storage Computing for Large-Scale Recommendation Inference . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 1056--1070. Xuan Sun, Hu Wan, Qiao Li, Chia-Lin Yang, Tei-Wei Kuo, and Chun Jason Xue. 2022. RM-SSD: In-Storage Computing for Large-Scale Recommendation Inference. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022. IEEE, 1056--1070."},{"key":"e_1_3_2_1_62_1","volume-title":"Proceedings of the 37th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2010. ACM, 175--186","author":"Udipi Aniruddha N.","unstructured":"Aniruddha N. Udipi , Naveen Muralimanohar , Niladrish Chatterjee , Rajeev Balasubramonian , Al Davis , and Norman P. Jouppi . 2010. Rethinking DRAM design and organization for energy-constrained multi-cores . In Proceedings of the 37th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2010. ACM, 175--186 . Aniruddha N. Udipi, Naveen Muralimanohar, Niladrish Chatterjee, Rajeev Balasubramonian, Al Davis, and Norman P. Jouppi. 2010. Rethinking DRAM design and organization for energy-constrained multi-cores. In Proceedings of the 37th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2010. ACM, 175--186."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.42"},{"key":"e_1_3_2_1_64_1","volume-title":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, SIGKDD 2018. ACM, 839--848","author":"Wang Jizhe","year":"2018","unstructured":"Jizhe Wang , Pipei Huang , Huan Zhao , Zhibo Zhang , Binqiang Zhao , and Dik Lun Lee . 2018 . Billion-scale Commodity Embedding for E-commerce Recommendation in Alibaba . In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, SIGKDD 2018. ACM, 839--848 . Jizhe Wang, Pipei Huang, Huan Zhao, Zhibo Zhang, Binqiang Zhao, and Dik Lun Lee. 2018. Billion-scale Commodity Embedding for E-commerce Recommendation in Alibaba. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, SIGKDD 2018. ACM, 839--848."},{"key":"e_1_3_2_1_65_1","volume-title":"Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021. ACM, 717--729","author":"Wilkening Mark","year":"2021","unstructured":"Mark Wilkening , Udit Gupta , Samuel Hsia , Caroline Trippel , Carole-Jean Wu , David Brooks , and Gu-Yeon Wei . 2021 . RecSSD: near data processing for solid state drive based recommendation inference . In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021. ACM, 717--729 . Mark Wilkening, Udit Gupta, Samuel Hsia, Caroline Trippel, Carole-Jean Wu, David Brooks, and Gu-Yeon Wei. 2021. RecSSD: near data processing for solid state drive based recommendation inference. In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021. ACM, 717--729."},{"key":"e_1_3_2_1_66_1","volume-title":"Joseph A. Konstan, Julian J. McAuley, Yves Raimond, and Hao Zhang.","author":"Wu Carole-Jean","year":"2020","unstructured":"Carole-Jean Wu , Robin Burke , Ed H. Chi , Joseph A. Konstan, Julian J. McAuley, Yves Raimond, and Hao Zhang. 2020 . Developing a Recommendation Benchmark for MLPerf Training and Inference. CoRR abs\/2003.07336 (2020). Carole-Jean Wu, Robin Burke, Ed H. Chi, Joseph A. Konstan, Julian J. McAuley, Yves Raimond, and Hao Zhang. 2020. Developing a Recommendation Benchmark for MLPerf Training and Inference. CoRR abs\/2003.07336 (2020)."},{"key":"e_1_3_2_1_67_1","volume-title":"Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Xie Xinfeng","year":"2021","unstructured":"Xinfeng Xie , Zheng Liang , Peng Gu , Abanti Basak , Lei Deng , Ling Liang , Xing Hu , and Yuan Xie . 2021 . SpaceA: Sparse Matrix Vector Multiplication on Processing-in-Memory Accelerator . In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 570--583. Xinfeng Xie, Zheng Liang, Peng Gu, Abanti Basak, Lei Deng, Ling Liang, Xing Hu, and Yuan Xie. 2021. SpaceA: Sparse Matrix Vector Multiplication on Processing-in-Memory Accelerator. In Proceedings of the IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021. IEEE, 570--583."},{"key":"e_1_3_2_1_68_1","volume-title":"Ping Tak Peter Tang, and Andrew Tulloch","author":"Yang Jie Amy","year":"2020","unstructured":"Jie Amy Yang , Jianyu Huang , Jongsoo Park , Ping Tak Peter Tang, and Andrew Tulloch . 2020 . Mixed-Precision Embedding Using a Cache. CoRR abs\/2010.11305 (2020). Jie Amy Yang, Jianyu Huang, Jongsoo Park, Ping Tak Peter Tang, and Andrew Tulloch. 2020. Mixed-Precision Embedding Using a Cache. CoRR abs\/2010.11305 (2020)."},{"key":"e_1_3_2_1_69_1","volume-title":"Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques, PACT 2018. ACM, 34:1--34:14","author":"Yazdanbakhsh Amir","year":"2018","unstructured":"Amir Yazdanbakhsh , Choungki Song , Jacob Sacks , Pejman Lotfi-Kamran , Hadi Esmaeilzadeh , and Nam Sung Kim . 2018 . In-DRAM near-data approximate acceleration for GPUs . In Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques, PACT 2018. ACM, 34:1--34:14 . Amir Yazdanbakhsh, Choungki Song, Jacob Sacks, Pejman Lotfi-Kamran, Hadi Esmaeilzadeh, and Nam Sung Kim. 2018. In-DRAM near-data approximate acceleration for GPUs. In Proceedings of the 27th International Conference on Parallel Architectures and Compilation Techniques, PACT 2018. ACM, 34:1--34:14."},{"key":"e_1_3_2_1_70_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 1042--1057","author":"Zhao Mark","year":"2022","unstructured":"Mark Zhao , Niket Agarwal , Aarti Basant , Bugra Gedik , Satadru Pan , Mustafa Ozdal , Rakesh Komuravelli , Jerry Pan , Tianshu Bao , Haowei Lu , Sundaram Narayanan , Jack Langman , Kevin Wilfong , Harsha Rastogi , Carole-Jean Wu , Christos Kozyrakis , and Parik Pol . 2022 . Understanding data storage and ingestion for large-scale deep recommendation model training: industrial product . In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 1042--1057 . Mark Zhao, Niket Agarwal, Aarti Basant, Bugra Gedik, Satadru Pan, Mustafa Ozdal, Rakesh Komuravelli, Jerry Pan, Tianshu Bao, Haowei Lu, Sundaram Narayanan, Jack Langman, Kevin Wilfong, Harsha Rastogi, Carole-Jean Wu, Christos Kozyrakis, and Parik Pol. 2022. Understanding data storage and ingestion for large-scale deep recommendation model training: industrial product. In Proceedings of the 49th Annual International Symposium on Computer Architecture, ISCA 2022. ACM, 1042--1057."},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence, AAAI","author":"Zhou Guorui","year":"2019","unstructured":"Guorui Zhou , Na Mou , Ying Fan , Qi Pi , Weijie Bian , Chang Zhou , Xiaoqiang Zhu , and Kun Gai . 2019 . Deep Interest Evolution Network for Click-Through Rate Prediction . In Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence, AAAI 2019. AAAI Press, 5941--5948. Guorui Zhou, Na Mou, Ying Fan, Qi Pi, Weijie Bian, Chang Zhou, Xiaoqiang Zhu, and Kun Gai. 2019. Deep Interest Evolution Network for Click-Through Rate Prediction. In Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence, AAAI 2019. AAAI Press, 5941--5948."}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589101","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:40Z","timestamp":1750178800000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589101"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":71,"alternative-id":["10.1145\/3579371.3589101","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589101","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}