{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:15:46Z","timestamp":1766067346639,"version":"3.37.3"},"reference-count":20,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Micro"],"published-print":{"date-parts":[[2021,9,1]]},"DOI":"10.1109\/mm.2021.3081981","type":"journal-article","created":{"date-parts":[[2021,5,19]],"date-time":"2021-05-19T21:49:00Z","timestamp":1621460940000},"page":"93-100","source":"Crossref","is-referenced-by-count":7,"title":["Low-Precision Hardware Architectures Meet Recommendation Model Inference at Scale"],"prefix":"10.1109","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6946-5357","authenticated-orcid":false,"given":"Zhaoxia","family":"Deng","sequence":"first","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4750-9440","authenticated-orcid":false,"given":"Jongsoo","family":"Park","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"given":"Ping Tak Peter","family":"Tang","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5160-9276","authenticated-orcid":false,"given":"Haixin","family":"Liu","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5294-3460","authenticated-orcid":false,"given":"Jie","family":"Yang","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3429-8338","authenticated-orcid":false,"given":"Hector","family":"Yuen","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7595-5539","authenticated-orcid":false,"given":"Jianyu","family":"Huang","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6706-5074","authenticated-orcid":false,"given":"Daya","family":"Khudia","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9997-0469","authenticated-orcid":false,"given":"Xiaohan","family":"Wei","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1999-401X","authenticated-orcid":false,"given":"Ellie","family":"Wen","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4520-765X","authenticated-orcid":false,"given":"Dhruv","family":"Choudhary","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"given":"Raghuraman","family":"Krishnamoorthi","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9032-7239","authenticated-orcid":false,"given":"Carole-Jean","family":"Wu","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8065-3401","authenticated-orcid":false,"given":"Satish","family":"Nadathur","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0283-8371","authenticated-orcid":false,"given":"Changkyu","family":"Kim","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6102-2903","authenticated-orcid":false,"given":"Maxim","family":"Naumov","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6511-1866","authenticated-orcid":false,"given":"Sam","family":"Naghshineh","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2433-6110","authenticated-orcid":false,"given":"Mikhail","family":"Smelyanskiy","sequence":"additional","affiliation":[{"name":"Facebook, Menlo Park, CA, USA"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648589"},{"year":"2020","author":"inc","key":"ref12"},{"key":"ref13","first-page":"112","article-title":"Trained quantization thresholds for accurate and efficient fixed-point inference of deep neural networks","author":"jain","year":"2020","journal-title":"Proc 3rd Conf Mach Learn Syst"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref15","article-title":"Quantizing deep convolutional networks for efficient inference: A whitepaper","volume":"abs 1806 8342","author":"krishnamoorthi","year":"2018","journal-title":"CoRR"},{"article-title":"DLRM workloads with implications on hardware and system platforms","year":"2020","author":"naumov","key":"ref16"},{"key":"ref17","first-page":"4486","article-title":"Same, same but different&#x2014;Recovering neural network quantization error through weight factorization","author":"meller","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref18","article-title":"Deep learning recommendation model for personalization and recommendation systems","volume":"abs 1906 91","author":"naumov","year":"2019","journal-title":"CoRR"},{"key":"ref19","article-title":"Deep learning inference in Facebook data centers: Characterization, performance optimizations and hardware implications","volume":"abs 1811 9886","author":"park","year":"0","journal-title":"CoRR"},{"year":"2021","key":"ref4","article-title":"PyTorch numeric suite"},{"year":"2018","key":"ref3","article-title":"FBGEMM"},{"key":"ref6","article-title":"Post-training 4-bit quantization on embedding tables","volume":"abs 1911 2079","author":"guan","year":"2019","journal-title":"CoRR"},{"key":"ref5","article-title":"Training with quantization noise for extreme model compression","volume":"abs 2004 7320","author":"fan","year":"2020","journal-title":"CoRR"},{"key":"ref8","first-page":"488","article-title":"The architectural implications of Facebook&#x2019;s DNN-based personalized recommendation","author":"gupta","year":"2020","journal-title":"Proc Int Symp High Perform Comput Archit"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"year":"2019","key":"ref2","article-title":"DLRM benchmarks"},{"year":"2020","key":"ref1","article-title":"Accelerating AI performance on 3rd Gen Intel Xeon scalable processors with TensorFlow and bfloat16"},{"key":"ref9","first-page":"1478","article-title":"Understanding the impact of precision quantization on the accuracy and energy of neural networks","author":"hashemi","year":"2017","journal-title":"Proc Conf Des Autom Test Eur"},{"article-title":"Training deep learning recommendation model with quantized collective communications","year":"2020","author":"yang","key":"ref20"}],"container-title":["IEEE Micro"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/40\/9536946\/09435938.pdf?arnumber=9435938","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:50:48Z","timestamp":1652194248000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9435938\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,1]]},"references-count":20,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/mm.2021.3081981","relation":{},"ISSN":["0272-1732","1937-4143"],"issn-type":[{"type":"print","value":"0272-1732"},{"type":"electronic","value":"1937-4143"}],"subject":[],"published":{"date-parts":[[2021,9,1]]}}}