{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:24:25Z","timestamp":1778081065378,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589350","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":409,"title":["TPU v4: An Optically Reconfigurable Supercomputer for Machine Learning with Hardware Support for Embeddings"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1765-1929","authenticated-orcid":false,"given":"Norm","family":"Jouppi","sequence":"first","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3478-518X","authenticated-orcid":false,"given":"George","family":"Kurian","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1068-5261","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7264-1443","authenticated-orcid":false,"given":"Peter","family":"Ma","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2146-8687","authenticated-orcid":false,"given":"Rahul","family":"Nagarajan","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8801-9384","authenticated-orcid":false,"given":"Lifeng","family":"Nai","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6620-0038","authenticated-orcid":false,"given":"Nishant","family":"Patil","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8715-8964","authenticated-orcid":false,"given":"Suvinay","family":"Subramanian","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7039-8812","authenticated-orcid":false,"given":"Andy","family":"Swing","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9409-7285","authenticated-orcid":false,"given":"Brian","family":"Towles","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2172-1651","authenticated-orcid":false,"given":"Clifford","family":"Young","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0121-6527","authenticated-orcid":false,"given":"Xiang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4991-8424","authenticated-orcid":false,"given":"Zongwei","family":"Zhou","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0902-7093","authenticated-orcid":false,"given":"David A","family":"Patterson","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"},{"name":"University of California, Berkeley, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"On the Factory Floor: ML Engineering for Industrial-Scale Ads Recommendation Models. 16th ACM Conference on Recommender Systems.","author":"Anil R.","unstructured":"Anil , R. , Gadanho , S. , Huang , D. , Jacob , N. , Li , Z. , Lin , D. , Phillips , T. , Pop , C. , Regan , K. , Shamir , G.I. and Shivanna , R ., 2022 . On the Factory Floor: ML Engineering for Industrial-Scale Ads Recommendation Models. 16th ACM Conference on Recommender Systems. Anil, R., Gadanho, S., Huang, D., Jacob, N., Li, Z., Lin, D., Phillips, T., Pop, C., Regan, K., Shamir, G.I. and Shivanna, R., 2022. On the Factory Floor: ML Engineering for Industrial-Scale Ads Recommendation Models. 16th ACM Conference on Recommender Systems."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Barroso L.A. and H\u00f6lzle U. 2009 First Edition. The datacenter as a computer: An introduction to the design of warehouse-scale machines. Synthesis lectures on computer architecture 6(3) pp.1--120.  Barroso L.A. and H\u00f6lzle U. 2009 First Edition. The datacenter as a computer: An introduction to the design of warehouse-scale machines. Synthesis lectures on computer architecture 6(3) pp.1--120.","DOI":"10.2200\/S00193ED1V01Y200905CAC006"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Barroso L.A. H\u00f6lzle U. and Ranganathan P. 2018. The datacenter as a computer: Designing warehouse-scale machines Third Edition. Synthesis Lectures on Computer Architecture 13(3) pp.i--189.  Barroso L.A. H\u00f6lzle U. and Ranganathan P. 2018. The datacenter as a computer: Designing warehouse-scale machines Third Edition. Synthesis Lectures on Computer Architecture 13(3) pp.i--189.","DOI":"10.2200\/S00874ED3V01Y201809CAC046"},{"key":"e_1_3_2_1_4_1","unstructured":"Bloomberg October 26 2015. Google turning its lucrative web search over to AI machines.  Bloomberg October 26 2015. Google turning its lucrative web search over to AI machines."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1972.8647"},{"key":"e_1_3_2_1_6_1","unstructured":"Brown T. Mann B. Ryder N. Subbiah M. Kaplan J.D. Dhariwal P. Neelakantan A. Shyam P. Sastry G. Askell A. and Agarwal S. 2020. Language models are few-shot learners. Advances in Neural Information Processing Systems 33 (NeurIPS 2020).  Brown T. Mann B. Ryder N. Subbiah M. Kaplan J.D. Dhariwal P. Neelakantan A. Shyam P. Sastry G. Askell A. and Agarwal S. 2020. Language models are few-shot learners. Advances in Neural Information Processing Systems 33 (NeurIPS 2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2010.30"},{"issue":"9","key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","first-page":"2506","DOI":"10.1109\/TPDS.2014.2355827","article-title":"Lattice graphs for high-scale interconnection topologies","volume":"26","author":"Camarero C.","year":"2014","unstructured":"Camarero , C. , Martinez , C. and Beivide , R. , 2014 . Lattice graphs for high-scale interconnection topologies . IEEE Transactions on Parallel and Distributed Systems , 26 ( 9 ), pp. 2506 -- 2519 . Camarero, C., Martinez, C. and Beivide, R., 2014. Lattice graphs for high-scale interconnection topologies. IEEE Transactions on Parallel and Distributed Systems, 26(9), pp. 2506--2519.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_9_1","unstructured":"Chowdhery A. Narang S. Devlin J. Bosma M. Mishra G. Roberts A. Barham P. Chung H.W. Sutton C. Gehrmann S. and Schuh P. 2022. PaLM: Scaling language modeling with pathways. arXiv preprint arXiv:2204.02311.  Chowdhery A. Narang S. Devlin J. Bosma M. Mishra G. Roberts A. Barham P. Chung H.W. Sutton C. Gehrmann S. and Schuh P. 2022. PaLM: Scaling language modeling with pathways. arXiv preprint arXiv:2204.02311."},{"key":"e_1_3_2_1_10_1","unstructured":"Colamco 2022. Mellanox QM8790 - Quantum HDR Switch https:\/\/www.colamco.com\/product\/mellanox-infiniband-switch-mqm8790-hs2f-1503410?utm_source=froogle&utm_medium=referral.  Colamco 2022. Mellanox QM8790 - Quantum HDR Switch https:\/\/www.colamco.com\/product\/mellanox-infiniband-switch-mqm8790-hs2f-1503410?utm_source=froogle&utm_medium=referral."},{"key":"e_1_3_2_1_11_1","first-page":"191","volume-title":"Proceedings of the 10th ACM conference on recommender systems","author":"Covington P.","unstructured":"Covington , P. , Adams , J. and Sargin , E ., 2016. Deep neural networks for Youtube recommendations . In Proceedings of the 10th ACM conference on recommender systems , pp. 191 -- 198 . Covington, P., Adams, J. and Sargin, E., 2016. Deep neural networks for Youtube recommendations. In Proceedings of the 10th ACM conference on recommender systems, pp. 191--198."},{"key":"e_1_3_2_1_12_1","unstructured":"Dally W.J. and Towles B.P. 2004. Principles and practices of interconnection networks. Elsevier.  Dally W.J. and Towles B.P. 2004. Principles and practices of interconnection networks. Elsevier."},{"key":"e_1_3_2_1_13_1","unstructured":"Deepmind Nov 18 2019 Advanced machine learning helps Play Store users discover personalised apps.  Deepmind Nov 18 2019 Advanced machine learning helps Play Store users discover personalised apps."},{"key":"e_1_3_2_1_14_1","volume-title":"BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805.","author":"Devlin J.","year":"2018","unstructured":"Devlin , J. , Chang , M.W. , Lee , K. and Toutanova , K. , 2018 . BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805. Devlin, J., Chang, M.W., Lee, K. and Toutanova, K., 2018. BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805."},{"key":"e_1_3_2_1_15_1","unstructured":"Dungworth M. Harrell J. Levine M. Nelson S. Oberlin S. and Reinhardt S.P. 2011. CRAY T3E.  Dungworth M. Harrell J. Levine M. Nelson S. Oberlin S. and Reinhardt S.P. 2011. CRAY T3E."},{"key":"e_1_3_2_1_16_1","first-page":"13","volume-title":"2007 ACM\/IEEE 34th International Symposium on Computer Architecture (ISCA)","author":"Fan X.","unstructured":"Fan , X. , Weber , W.D. and Barroso , L.A ., 2007. Power provisioning for a warehouse-sized computer . In 2007 ACM\/IEEE 34th International Symposium on Computer Architecture (ISCA) , pp. 13 -- 23 . Fan, X., Weber, W.D. and Barroso, L.A., 2007. Power provisioning for a warehouse-sized computer. In 2007 ACM\/IEEE 34th International Symposium on Computer Architecture (ISCA), pp.13--23."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of ACM SIGCOMM","author":"Farrington N., G.","year":"2010","unstructured":"Farrington , N., G. Porter , S. Radhakrishnan , H. H. Bazzaz , V. Subramanya , Y. Fainman , G. Papen , and A. Vahdat . Helios: A hybrid electrical\/optical switch architecture for modular data centers . In Proceedings of ACM SIGCOMM , Aug. 2010 . Farrington, N., G. Porter, S. Radhakrishnan, H. H. Bazzaz, V. Subramanya, Y. Fainman, G. Papen, and A. Vahdat. Helios: A hybrid electrical\/optical switch architecture for modular data centers. In Proceedings of ACM SIGCOMM, Aug. 2010."},{"key":"e_1_3_2_1_18_1","volume-title":"Collide: Recommendation System Model Compression with Learned Hash Functions. arXiv preprint arXiv:2203.15837.","author":"Ghaemmaghami B.","year":"2022","unstructured":"Ghaemmaghami , B. , Ozdal , M. , Komuravelli , R. , Korchev , D. , Mudigere , D. , Nair , K. and Naumov , M. , 2022 . Learning to Collide: Recommendation System Model Compression with Learned Hash Functions. arXiv preprint arXiv:2203.15837. Ghaemmaghami, B., Ozdal, M., Komuravelli, R., Korchev, D., Mudigere, D., Nair, K. and Naumov, M., 2022. Learning to Collide: Recommendation System Model Compression with Learned Hash Functions. arXiv preprint arXiv:2203.15837."},{"key":"e_1_3_2_1_19_1","unstructured":"Google Tracking our carbon free energy progress https:\/\/sustainability.google\/progress\/energy\/.  Google Tracking our carbon free energy progress https:\/\/sustainability.google\/progress\/energy\/."},{"key":"e_1_3_2_1_20_1","unstructured":"Google Google Data Center Efficiency https:\/\/www.google.com\/about\/datacenters\/efficiency\/.  Google Google Data Center Efficiency https:\/\/www.google.com\/about\/datacenters\/efficiency\/."},{"key":"e_1_3_2_1_21_1","unstructured":"Graphcore 2022. IPU-POD64 Reference Design Datasheet docs.graphcore.ai\/projects\/ipu-pod64-datasheet\/en\/latest\/  Graphcore 2022. IPU-POD64 Reference Design Datasheet docs.graphcore.ai\/projects\/ipu-pod64-datasheet\/en\/latest\/"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/ISSCC.2014.6757323","volume-title":"2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC)","author":"Horowitz M.","year":"2014","unstructured":"Horowitz , M. , 2014 . Computing's energy problem (and what we can do about it) . In 2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC) , pp. 10 -- 14 . Horowitz, M., 2014. Computing's energy problem (and what we can do about it). In 2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC), pp. 10--14."},{"key":"e_1_3_2_1_23_1","unstructured":"Insight 2022. Mellanox Quantum QM8790 - switch - 40 ports https:\/\/www.insight.com\/en_US\/shop\/product\/MQM8790-HS2F\/Mellanox\/MQM8790-HS2F\/MellanoxQuantumQM8790-swit\/.  Insight 2022. Mellanox Quantum QM8790 - switch - 40 ports https:\/\/www.insight.com\/en_US\/shop\/product\/MQM8790-HS2F\/Mellanox\/MQM8790-HS2F\/MellanoxQuantumQM8790-swit\/."},{"key":"e_1_3_2_1_24_1","volume-title":"Status Report","author":"International Energy Agency","year":"2019","unstructured":"International Energy Agency , Global Energy & CO2 Status Report 2019 , Report Extract Emissions . International Energy Agency, Global Energy & CO2 Status Report 2019, Report Extract Emissions."},{"key":"e_1_3_2_1_25_1","first-page":"1","volume-title":"ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)","author":"Jouppi N.P.","unstructured":"Jouppi , N.P. , Young , C. , Patil , N. , Patterson , D. , Agrawal , G. , Bajwa , R. , Bates , S. , Bhatia , S. , Boden , N. , Borchers , A. , Boyle , R. , Cantin , P. , Chao , C. , Clark , C. , Coriell , J. , Daley , M. , Dau , M. , Dean , J. , Gelb , B. , Ghaemmaghami , T. , Gottipati , R. , Gulland , W. , Hagmann , R. , Ho , C.R. , Hogberg , D. , Hu , J. , Hundt , R. , Hurt , D. , Ibarz , J. , Jaffey , A. , Jaworski , A. , Kaplan , A. , Khaitan , H. , Killebrew , D. , Koch , A. , Kumar , N. , Lacy , S. , Laudon , J. , Law , J. , Le , D. , Leary , C. , Liu , Z. , Lucke , K. , Lundin , A. , MacKean , G. , Maggiore , A. , Mahony , M. , Miller , K. , Nagarajan , R. , Narayanaswami , R. , Ni , R. , Nix , K. , Norrie , T. , Omernick , M. , Penukonda , N. , Phelps , A. , Ross , J. , Ross , M. , Salek , A. , Samadiani , E. , Severn , C. , Sizikov , G. , Snelham , M. , Souter , J. , Steinberg , D. , Swing , A. , Tan , M. , Thorson , G. , Tian , B. , Toma , H. , Tuttle , E. , Vijay Vasudevan , Walter, R., Wang , W. , Wilcox , E. , and Yoon , D.H . 2017. In-datacenter performance analysis of a tensor processing unit . In ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA) , pp. 1 -- 12 . Jouppi, N.P., Young, C., Patil, N., Patterson, D., Agrawal, G., Bajwa, R., Bates, S., Bhatia, S., Boden, N., Borchers, A., Boyle, R., Cantin, P., Chao, C., Clark, C., Coriell, J., Daley, M., Dau, M., Dean, J., Gelb, B., Ghaemmaghami, T., Gottipati, R., Gulland, W., Hagmann, R., Ho, C.R., Hogberg, D., Hu, J., Hundt, R., Hurt, D., Ibarz, J., Jaffey, A., Jaworski, A., Kaplan, A., Khaitan, H., Killebrew, D., Koch, A., Kumar, N., Lacy, S., Laudon, J., Law, J., Le, D., Leary, C., Liu, Z., Lucke, K., Lundin, A., MacKean, G., Maggiore, A., Mahony, M., Miller, K., Nagarajan, R., Narayanaswami, R., Ni, R., Nix, K., Norrie, T., Omernick, M., Penukonda, N., Phelps, A., Ross, J., Ross, M., Salek, A., Samadiani, E., Severn, C., Sizikov, G., Snelham, M., Souter, J., Steinberg, D., Swing, A., Tan, M., Thorson, G., Tian, B., Toma, H., Tuttle, E., Vijay Vasudevan, Walter, R., Wang, W., Wilcox, E., and Yoon, D.H. 2017. In-datacenter performance analysis of a tensor processing unit. In ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA), pp. 1--12."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_1_27_1","first-page":"1","volume-title":"2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA)","author":"Jouppi N.P.","unstructured":"Jouppi , N.P. , Yoon , D.H. , Ashcraft , M. , Gottscho , M. , Jablin , T.B. , Kurian , G. , Laudon , J. , Li , S. , Ma , P. , Ma , X. , Norrie , T. , Patil , N. , Prasad , S. , Young , C. , Zhou , Z. , and Patterson , D . 2021. Ten lessons from three generations shaped Google's TPUv4i . In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA) , pp. 1 -- 14 . Jouppi, N.P., Yoon, D.H., Ashcraft, M., Gottscho, M., Jablin, T.B., Kurian, G., Laudon, J., Li, S., Ma, P., Ma, X., Norrie, T., Patil, N., Prasad, S., Young, C., Zhou, Z., and Patterson, D. 2021. Ten lessons from three generations shaped Google's TPUv4i. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA), pp. 1--14."},{"key":"e_1_3_2_1_28_1","first-page":"183","volume-title":"Proceedings of the 4th Int'l conference on computing frontiers","author":"Kamil S.","unstructured":"Kamil , S. , Pinar , A. , Gunter , D. , Lijewski , M. , Oliker , L. and Shalf , J ., 2007, May. Reconfigurable hybrid interconnection for static and dynamic scientific applications . In Proceedings of the 4th Int'l conference on computing frontiers , pp. 183 -- 194 . Kamil, S., Pinar, A., Gunter, D., Lijewski, M., Oliker, L. and Shalf, J., 2007, May. Reconfigurable hybrid interconnection for static and dynamic scientific applications. In Proceedings of the 4th Int'l conference on computing frontiers, pp. 183--194."},{"issue":"2","key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","first-page":"188","DOI":"10.1109\/TPDS.2009.61","article-title":"Communication requirements and interconnect optimization for high-end scientific applications","volume":"21","author":"Kamil S.","year":"2009","unstructured":"Kamil , S. , Oliker , L. , Pinar , A. and Shalf , J. , 2009 . Communication requirements and interconnect optimization for high-end scientific applications . IEEE Transactions on Parallel and Distributed Systems , 21 ( 2 ), pp. 188 -- 202 . Kamil, S., Oliker, L., Pinar, A. and Shalf, J., 2009. Communication requirements and interconnect optimization for high-end scientific applications. IEEE Transactions on Parallel and Distributed Systems, 21(2), pp. 188--202.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_30_1","first-page":"29","volume-title":"2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)","author":"Karandikar S.","unstructured":"Karandikar , S. , Mao , H. , Kim , D. , Biancolin , D. , Amid , A. , Lee , D. , Pemberton , N. , Amaro , E. , Schmidt , C. , Chopra , A. and Huang , Q ., 2018, June. FireSim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud . In 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA) , pp. 29 -- 42 . Karandikar, S., Mao, H., Kim, D., Biancolin, D., Amid, A., Lee, D., Pemberton, N., Amaro, E., Schmidt, C., Chopra, A. and Huang, Q., 2018, June. FireSim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud. In 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), pp. 29--42."},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 2021 ACM SIGCOMM 2021 Conference (pp. 657--675)","author":"Khani M.","unstructured":"Khani , M. , Ghobadi , M. , Alizadeh , M. , Zhu , Z. , Glick , M. , Bergman , K. , Vahdat , A. , Klenk , B. and Ebrahimi , E ., 2021, August. SiP-ML: high-bandwidth optical network interconnects for machine learning training . In Proceedings of the 2021 ACM SIGCOMM 2021 Conference (pp. 657--675) . Khani, M., Ghobadi, M., Alizadeh, M., Zhu, Z., Glick, M., Bergman, K., Vahdat, A., Klenk, B. and Ebrahimi, E., 2021, August. SiP-ML: high-bandwidth optical network interconnects for machine learning training. In Proceedings of the 2021 ACM SIGCOMM 2021 Conference (pp. 657--675)."},{"key":"e_1_3_2_1_32_1","first-page":"8085","volume-title":"Searching for Fast Model Families on Datacenter Accelerators. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Li S.","unstructured":"Li , S. , Tan , M. , Pang , R. , Li , A. , Cheng , L. , Le , Q.V. and Jouppi , N.P ., 2021 . Searching for Fast Model Families on Datacenter Accelerators. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition , pp. 8085 -- 8095 . Li, S., Tan, M., Pang, R., Li, A., Cheng, L., Le, Q.V. and Jouppi, N.P., 2021. Searching for Fast Model Families on Datacenter Accelerators. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8085--8095."},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Li S.","unstructured":"Li , S. , Andersen , G. , Chen , T. , Cheng , L. , Grady , J. , Huang , D. , Le , Q. , Li , A. , Li , X. , Li , Y. , Liang , C. , Lu , Y. , Ni , Y. , Pang , F. , Ranganathan , P. , Tan , M. , Wicke , M. , Wu , G. , Zhu , S. , and Jouppi , N ., 2023. Hyperscale Hardware Optimized Neural Architecture Search , In Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). Li, S., Andersen, G., Chen, T., Cheng, L., Grady, J., Huang, D., Le, Q., Li, A., Li, X., Li, Y., Liang, C., Lu, Y., Ni, Y., Pang, F., Ranganathan, P., Tan, M., Wicke, M., Wu, G., Zhu, S., and Jouppi, N., 2023. Hyperscale Hardware Optimized Neural Architecture Search, In Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aba3758"},{"key":"e_1_3_2_1_35_1","volume-title":"2016 Optical Fiber Communications Conference and Exhibition (OFC) (pp. 1--3). IEEE.","author":"Minkenberg C.","unstructured":"Minkenberg , C. , Rodriguez , G. , Prisacari , B. , Schares , L. , Heidelberger , P. , Chen , D. and Stunkel , C ., 2016, March. Performance benefits of optical circuit switches for large-scale dragonfly networks . In 2016 Optical Fiber Communications Conference and Exhibition (OFC) (pp. 1--3). IEEE. Minkenberg, C., Rodriguez, G., Prisacari, B., Schares, L., Heidelberger, P., Chen, D. and Stunkel, C., 2016, March. Performance benefits of optical circuit switches for large-scale dragonfly networks. In 2016 Optical Fiber Communications Conference and Exhibition (OFC) (pp. 1--3). IEEE."},{"key":"e_1_3_2_1_36_1","unstructured":"MLCommons V 2.0 Results June 29 2022 https:\/\/mlcommons.org\/en\/training-normal-20\/.  MLCommons V 2.0 Results June 29 2022 https:\/\/mlcommons.org\/en\/training-normal-20\/."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture (pp. 993--1011)","author":"Mudigere D.","unstructured":"Mudigere , D. , Hao , Y. , Huang , J. , Jia , Z. , Tulloch , A. , Sridharan , S. , Liu , X. , Ozdal , M. , Nie , J. , Park , J. and Luo , L ., 2022, June. Software-hardware co-design for fast and scalable training of deep learning recommendation models . In Proceedings of the 49th Annual International Symposium on Computer Architecture (pp. 993--1011) . Mudigere, D., Hao, Y., Huang, J., Jia, Z., Tulloch, A., Sridharan, S., Liu, X., Ozdal, M., Nie, J., Park, J. and Luo, L., 2022, June. Software-hardware co-design for fast and scalable training of deep learning recommendation models. In Proceedings of the 49th Annual International Symposium on Computer Architecture (pp. 993--1011)."},{"key":"e_1_3_2_1_38_1","volume-title":"MUM: A new AI milestone for understanding information. Google, May, 18.","author":"Nayak P.","year":"2021","unstructured":"Nayak , P. , 2021 . MUM: A new AI milestone for understanding information. Google, May, 18. Nayak, P., 2021. MUM: A new AI milestone for understanding information. Google, May, 18."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_2_1_40_1","unstructured":"Nvidia 2020. Nvidia A100 Tensor Core GPU Architecture.  Nvidia 2020. Nvidia A100 Tensor Core GPU Architecture."},{"key":"e_1_3_2_1_41_1","unstructured":"Nvidia 2021. Nvidia DGX SuperPOD: Scalable Infrastructure for AI Leadership Reference Architecture.  Nvidia 2021. Nvidia DGX SuperPOD: Scalable Infrastructure for AI Leadership Reference Architecture."},{"issue":"7","key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/MC.2022.3148714","article-title":"The carbon footprint of machine learning training will plateau then shrink","volume":"55","author":"Patterson D.","year":"2022","unstructured":"Patterson , D. , Gonzalez , J. , Le , Q. , Liang , C. , Munguia , L.M. , Rothchild , D. , So , D. , Texier , M. and Dean , J. , 2022 . The carbon footprint of machine learning training will plateau then shrink , IEEE Computer , 55 ( 7 ), pp. 18 -- 28 . Patterson, D., Gonzalez, J., Le, Q., Liang, C., Munguia, L.M., Rothchild, D., So, D., Texier, M. and Dean, J., 2022. The carbon footprint of machine learning training will plateau then shrink, IEEE Computer, 55(7), pp. 18--28.","journal-title":"IEEE Computer"},{"key":"e_1_3_2_1_43_1","first-page":"66","volume-title":"Proceedings of the ACM SIGCOMM 2022 Conference","author":"Poutievski L.","unstructured":"Poutievski , L. , Mashayekhi , O. , Ong , J. , Singh , A. , Tariq , M. , Wang , R. , Zhang , J. , Beauregard , V. , Conner , P. , Gribble , S. and Kapoor , R ., 2022. Jupiter evolving: transforming Google's datacenter network via optical circuit switches and software-defined networking . In Proceedings of the ACM SIGCOMM 2022 Conference , pp. 66 -- 85 . Poutievski, L., Mashayekhi, O., Ong, J., Singh, A., Tariq, M., Wang, R., Zhang, J., Beauregard, V., Conner, P., Gribble, S. and Kapoor, R., 2022. Jupiter evolving: transforming Google's datacenter network via optical circuit switches and software-defined networking. In Proceedings of the ACM SIGCOMM 2022 Conference, pp. 66--85."},{"key":"e_1_3_2_1_44_1","first-page":"48","volume-title":"Proceedings of the 13th International Conference on architectural support for programming languages and operating systems (ASPLOS)","author":"Raghavendra R.","unstructured":"Raghavendra , R. , Ranganathan , P. , Talwar , V. , Wang , Z. and Zhu , X ., 2008, March. No \"power\" struggles: coordinated multi-level power management for the data center . In Proceedings of the 13th International Conference on architectural support for programming languages and operating systems (ASPLOS) , pp. 48 -- 59 . Raghavendra, R., Ranganathan, P., Talwar, V., Wang, Z. and Zhu, X., 2008, March. No \"power\" struggles: coordinated multi-level power management for the data center. In Proceedings of the 13th International Conference on architectural support for programming languages and operating systems (ASPLOS), pp. 48--59."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3381831"},{"key":"e_1_3_2_1_46_1","first-page":"26","volume-title":"Proceedings of the 7th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)","author":"Scott S.L.","year":"1996","unstructured":"Scott , S.L. , 1996 . Synchronization and communication in the T3E multiprocessor . In Proceedings of the 7th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , pp. 26 -- 36 . Scott, S.L., 1996. Synchronization and communication in the T3E multiprocessor. In Proceedings of the 7th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), pp. 26--36."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/800052.801897"},{"key":"e_1_3_2_1_48_1","unstructured":"Sethi G. Bhattacharya P. Choudhary D. Wu C.-J. and Kozyrakis C. 2022. FlexShard: Flexible Sharding for Industry-Scale Sequence Recommendation Models arxiv preprint arXiv:2301.02959.  Sethi G. Bhattacharya P. Choudhary D. Wu C.-J. and Kozyrakis C. 2022. FlexShard: Flexible Sharding for Industry-Scale Sequence Recommendation Models arxiv preprint arXiv:2301.02959."},{"key":"e_1_3_2_1_49_1","volume-title":"SC'05: Proceedings of the 2005 ACM\/IEEE Conference on Supercomputing (pp. 17--17)","author":"Shalf J.","unstructured":"Shalf , J. , Kamil , S. , Oliker , L. and Skinner , D ., 2005, November. Analyzing ultra-scale application communication requirements for a reconfigurable hybrid interconnect . In SC'05: Proceedings of the 2005 ACM\/IEEE Conference on Supercomputing (pp. 17--17) . Shalf, J., Kamil, S., Oliker, L. and Skinner, D., 2005, November. Analyzing ultra-scale application communication requirements for a reconfigurable hybrid interconnect. In SC'05: Proceedings of the 2005 ACM\/IEEE Conference on Supercomputing (pp. 17--17)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1364782.1364802"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of ACM Hotnets","author":"Singla A.","year":"2010","unstructured":"Singla , A. , Singh , K. Ramachandran , L. Xu , and Y. Zhang . Proteus: A topology malleable data center network . In Proceedings of ACM Hotnets , Oct. 2010 . Singla, A., Singh, K. Ramachandran, L. Xu, and Y. Zhang. Proteus: A topology malleable data center network. In Proceedings of ACM Hotnets, Oct. 2010."},{"key":"e_1_3_2_1_52_1","unstructured":"Taylor P. 2022. Data center average annual power usage effectiveness (PUE) worldwide 2007--2022 Nov 22.  Taylor P. 2022. Data center average annual power usage effectiveness (PUE) worldwide 2007--2022 Nov 22."},{"key":"e_1_3_2_1_53_1","volume-title":"Sept.","author":"Teh M. Y., S.","year":"2020","unstructured":"Teh , M. Y., S. Zhao , P. Cao , and K. Bergman . Couder: Robust topology engineering for optical circuit switched data center networks. arXiv preprint arXiv:2010.00090 , Sept. 2020 . Teh, M. Y., S. Zhao, P. Cao, and K. Bergman. Couder: Robust topology engineering for optical circuit switched data center networks. arXiv preprint arXiv:2010.00090, Sept. 2020."},{"key":"e_1_3_2_1_54_1","unstructured":"Thoppilan R. De Freitas D. Hall J. Shazeer N. Kulshreshtha A. Cheng H.T. Jin A. Bos T. Baker L. Du Y. and Li Y. 2022. LaMDA: Language models for dialog applications. arXiv preprint arXiv:2201.08239.  Thoppilan R. De Freitas D. Hall J. Shazeer N. Kulshreshtha A. Cheng H.T. Jin A. Bos T. Baker L. Du Y. and Li Y. 2022. LaMDA: Language models for dialog applications. arXiv preprint arXiv:2201.08239."},{"key":"e_1_3_2_1_55_1","volume-title":"Mission Apollo: Landing Optical Circuit Switching at Datacenter Scale. arXiv preprint arXiv:2208.10041.","author":"Urata R.","year":"2022","unstructured":"Urata , R. , Liu , H. , Yasumura , K. , Mao , E. , Berger , J. , Zhou , X. , Lam , C. , Bannon , R. , Hutchinson , D. , Nelson , D. and Poutievski, L., 2022 . Mission Apollo: Landing Optical Circuit Switching at Datacenter Scale. arXiv preprint arXiv:2208.10041. Urata, R., Liu, H., Yasumura, K., Mao, E., Berger, J., Zhou, X., Lam, C., Bannon, R., Hutchinson, D., Nelson, D. and Poutievski, L., 2022. Mission Apollo: Landing Optical Circuit Switching at Datacenter Scale. arXiv preprint arXiv:2208.10041."},{"key":"e_1_3_2_1_56_1","unstructured":"US Energy Information Agency 2022. Oklahoma State Profile and Energy Estimates https:\/\/www.eia.gov\/state\/?sid=OK#:~:text=In%202021%2C%20wind%20supplied%2041 electricity%20net%20generation%20from%20wind.  US Energy Information Agency 2022. Oklahoma State Profile and Energy Estimates https:\/\/www.eia.gov\/state\/?sid=OK#:~:text=In%202021%2C%20wind%20supplied%2041 electricity%20net%20generation%20from%20wind."},{"key":"e_1_3_2_1_57_1","volume-title":"Proceedings of the Optical Fiber Communications Conference.","author":"Vahdat A., H.","unstructured":"Vahdat , A., H. Liu , X. Zhao , and C. Johnson , 2011. The emerging optical data center . In Proceedings of the Optical Fiber Communications Conference. Vahdat, A., H. Liu, X. Zhao, and C. Johnson, 2011. The emerging optical data center. In Proceedings of the Optical Fiber Communications Conference."},{"key":"e_1_3_2_1_58_1","unstructured":"Vaswani A. Shazeer N. Parmar N. Uszkoreit J. Jones L. Gomez A.N. Kaiser \u0141. and Polosukhin I. 2017. Attention is all you need. Advances in neural information processing systems 30 (NeurIPS 2017).  Vaswani A. Shazeer N. Parmar N. Uszkoreit J. Jones L. Gomez A.N. Kaiser \u0141. and Polosukhin I. 2017. Attention is all you need. Advances in neural information processing systems 30 (NeurIPS 2017)."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Wang S.","unstructured":"Wang , S. , Wei , J. , Sabne , A. , David , A. , Llbeyi , B. , Hechtman , B. , Chen , D. , Murthy , K. S. , Maggioni , M. , Zhang , Q. , Kumar , S. , Guo , T. , Xu , Y. , and Zhou , Z ., 2023. Overlap Communication with Dependent Computation via Decomposition in Large Deep Learning Models , In Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). Wang, S., Wei, J., Sabne, A., David, A., Llbeyi, B., Hechtman, B., Chen, D., Murthy, K. S., Maggioni, M., Zhang, Q., Kumar, S., Guo, T., Xu, Y., and Zhou, Z., 2023. Overlap Communication with Dependent Computation via Decomposition in Large Deep Learning Models, In Proceedings of the 28th International conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_60_1","volume-title":"Topoopt: Co-optimizing network topology and parallelization strategy for distributed training jobs. arXiv preprint arXiv:2202.00433.","author":"Wang W.","year":"2022","unstructured":"Wang , W. , Khazraee , M. , Zhong , Z. , Ghobadi , M. , Jia , Z. , Mudigere , D. , Zhang , Y. and Kewitsch , A. , 2022 . Topoopt: Co-optimizing network topology and parallelization strategy for distributed training jobs. arXiv preprint arXiv:2202.00433. Wang, W., Khazraee, M., Zhong, Z., Ghobadi, M., Jia, Z., Mudigere, D., Zhang, Y. and Kewitsch, A., 2022. Topoopt: Co-optimizing network topology and parallelization strategy for distributed training jobs. arXiv preprint arXiv:2202.00433."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1109\/ISCA.2016.48","volume-title":"2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA)","author":"Wu Q.","year":"2016","unstructured":"Wu , Q. , Deng , Q. , Ganesh , L. , Hsu , C.H. , Jin , Y. , Kumar , S. , Li , B. , Meza , J. and Song , Y.J ., 2016. Dynamo: Facebook's Data Center-Wide Power Management System . 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA) , Seoul , ( 2016 ), pp. 469 -- 480 . Wu, Q., Deng, Q., Ganesh, L., Hsu, C.H., Jin, Y., Kumar, S., Li, B., Meza, J. and Song, Y.J., 2016. Dynamo: Facebook's Data Center-Wide Power Management System. 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), Seoul, (2016), pp. 469--480."},{"key":"e_1_3_2_1_63_1","unstructured":"Xu Y. Lee H. Chen D. Hechtman B. Huang Y. Joshi R. Krikun M. Lepikhin D. Ly A. Maggioni M. and Pang R. 2021. GSPMD: general and scalable parallelization for ML computation graphs. arXiv preprint arXiv:2105.04663.  Xu Y. Lee H. Chen D. Hechtman B. Huang Y. Joshi R. Krikun M. Lepikhin D. Ly A. Maggioni M. and Pang R. 2021. GSPMD: general and scalable parallelization for ML computation graphs. arXiv preprint arXiv:2105.04663."},{"key":"e_1_3_2_1_64_1","first-page":"1042","volume-title":"Proceedings of the 49th Annual International Symposium on Computer Architecture (ISCA)","author":"Zhao M.","unstructured":"Zhao , M. , Agarwal , N. , Basant , A. , Gedik , B. , Pan , S. , Ozdal , M. , Komuravelli , R. , Pan , J. , Bao , T. , Lu , H. and Narayanan , S ., 2022, June. Understanding data storage and ingestion for large-scale deep recommendation model training: Industrial product . In Proceedings of the 49th Annual International Symposium on Computer Architecture (ISCA) , pp. 1042 -- 1057 . Zhao, M., Agarwal, N., Basant, A., Gedik, B., Pan, S., Ozdal, M., Komuravelli, R., Pan, J., Bao, T., Lu, H. and Narayanan, S., 2022, June. Understanding data storage and ingestion for large-scale deep recommendation model training: Industrial product. In Proceedings of the 49th Annual International Symposium on Computer Architecture (ISCA), pp. 1042--1057."}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589350","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:40Z","timestamp":1750178800000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589350"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":64,"alternative-id":["10.1145\/3579371.3589350","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589350","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}