{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T07:30:25Z","timestamp":1766820625165,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,3,25]],"date-time":"2023-03-25T00:00:00Z","timestamp":1679702400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,3,25]]},"DOI":"10.1145\/3582016.3582049","type":"proceedings-article","created":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T16:59:03Z","timestamp":1679331543000},"page":"343-358","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Hyperscale Hardware Optimized Neural Architecture Search"],"prefix":"10.1145","author":[{"given":"Sheng","family":"Li","sequence":"first","affiliation":[{"name":"Google, USA"}]},{"given":"Garrett","family":"Andersen","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Tao","family":"Chen","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Liqun","family":"Cheng","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Julian","family":"Grady","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Da","family":"Huang","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Quoc V.","family":"Le","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Andrew","family":"Li","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Xin","family":"Li","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Yang","family":"Li","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Chen","family":"Liang","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Yifeng","family":"Lu","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Yun","family":"Ni","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Ruoming","family":"Pang","sequence":"additional","affiliation":[{"name":"Apple, USA"}]},{"given":"Mingxing","family":"Tan","sequence":"additional","affiliation":[{"name":"Waymo, USA"}]},{"given":"Martin","family":"Wicke","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Gang","family":"Wu","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Shengqi","family":"Zhu","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Parthasarathy","family":"Ranganathan","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Norman P.","family":"Jouppi","sequence":"additional","affiliation":[{"name":"Google, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,3,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org \t\t\t\t  Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org"},{"key":"e_1_3_2_1_2_1","volume-title":"Le","author":"Adiwardana Daniel","year":"2020","unstructured":"Daniel Adiwardana , Minh-Thang Luong , David R. So , Jamie Hall , Noah Fiedel , Romal Thoppilan , Zi Yang , Apoorv Kulshreshtha , Gaurav Nemade , Yifeng Lu , and Quoc V . Le . 2020 . Towards a Human-like Open-Domain Chatbot. CoRR , abs\/2001.09977 (2020), arXiv:2001.09977. arxiv:2001.09977 Daniel Adiwardana, Minh-Thang Luong, David R. So, Jamie Hall, Noah Fiedel, Romal Thoppilan, Zi Yang, Apoorv Kulshreshtha, Gaurav Nemade, Yifeng Lu, and Quoc V. Le. 2020. Towards a Human-like Open-Domain Chatbot. CoRR, abs\/2001.09977 (2020), arXiv:2001.09977. arxiv:2001.09977"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS54860.2022.00037"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01433"},{"key":"e_1_3_2_1_5_1","first-page":"1877","article-title":"Language Models are Few-Shot Learners","volume":"33","author":"Brown Tom B.","year":"2020","unstructured":"Tom B. Brown , Benjamin Mann , Nick Ryder , Melanie Subbiah , Jared Kaplan , Prafulla Dhariwal , Arvind Neelakantan , Pranav Shyam , Girish Sastry , Amanda Askell , Sandhini Agarwal , Ariel Herbert-Voss , Gretchen Krueger , Tom Henighan , Rewon Child , Aditya Ramesh , Daniel M. Ziegler , Jeffrey Wu , Clemens Winter , Christopher Hesse , Mark Chen , Eric Sigler , Mateusz Litwin , Scott Gray , Benjamin Chess , Jack Clark , Christopher Berner , Sam McCandlish , Alec Radford , Ilya Sutskever , and Dario Amodei . 2020 . Language Models are Few-Shot Learners . In Advances in Neural Information Processing Systems. 33 , 1877 \u2013 1901 . https:\/\/dl.acm.org\/doi\/abs\/10.5555\/3495724.3495883 Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems. 33, 1877\u20131901. https:\/\/dl.acm.org\/doi\/abs\/10.5555\/3495724.3495883","journal-title":"Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10","author":"Cai Han","year":"2020","unstructured":"Han Cai , Chuang Gan , and Song Han . 2020 . Once for All: Train One Network and Specialize it for Efficient Deployment . In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10 .48550\/arXiv.1908.09791 10.48550\/arXiv.1908.09791 Han Cai, Chuang Gan, and Song Han. 2020. Once for All: Train One Network and Specialize it for Efficient Deployment. In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10.48550\/arXiv.1908.09791"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10","author":"Cai Han","year":"2019","unstructured":"Han Cai , Ligeng Zhu , and Song Han . 2019 . ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware . In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10 .48550\/arXiv.1812.00332 10.48550\/arXiv.1812.00332 Han Cai, Ligeng Zhu, and Song Han. 2019. ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware. In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10.48550\/arXiv.1812.00332"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01205"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2018.022071134"},{"key":"#cr-split#-e_1_3_2_1_10_1.1","unstructured":"Aakanksha Chowdhery Sharan Narang Jacob Devlin Maarten Bosma Gaurav Mishra Adam Roberts Paul Barham Hyung Won Chung Charles Sutton Sebastian Gehrmann Parker Schuh Kensen Shi Sasha Tsvyashchenko Joshua Maynez Abhishek Rao Parker Barnes Yi Tay Noam Shazeer Vinodkumar Prabhakaran Emily Reif Nan Du Ben Hutchinson Reiner Pope James Bradbury Jacob Austin Michael Isard Guy Gur-Ari Pengcheng Yin Toju Duke Anselm Levskaya Sanjay Ghemawat Sunipa Dev Henryk Michalewski Xavier Garcia Vedant Misra Kevin Robinson Liam Fedus Denny Zhou Daphne Ippolito David Luan Hyeontaek Lim Barret Zoph Alexander Spiridonov Ryan Sepassi David Dohan Shivani Agrawal Mark Omernick Andrew M. Dai Thanumalayan Sankaranarayana Pillai Marie Pellat Aitor Lewkowycz Erica Moreira Rewon Child Oleksandr Polozov Katherine Lee Zongwei Zhou Xuezhi Wang Brennan Saeta Mark Diaz Orhan Firat Michele Catasta Jason Wei Kathy Meier-Hellstern Douglas Eck Jeff Dean Slav Petrov and Noah Fiedel. 2022. PaLM: Scaling Language Modeling with Pathways. https:\/\/doi.org\/10.48550\/ARXIV.2204.02311 10.48550\/ARXIV.2204.02311"},{"key":"#cr-split#-e_1_3_2_1_10_1.2","unstructured":"Aakanksha Chowdhery Sharan Narang Jacob Devlin Maarten Bosma Gaurav Mishra Adam Roberts Paul Barham Hyung Won Chung Charles Sutton Sebastian Gehrmann Parker Schuh Kensen Shi Sasha Tsvyashchenko Joshua Maynez Abhishek Rao Parker Barnes Yi Tay Noam Shazeer Vinodkumar Prabhakaran Emily Reif Nan Du Ben Hutchinson Reiner Pope James Bradbury Jacob Austin Michael Isard Guy Gur-Ari Pengcheng Yin Toju Duke Anselm Levskaya Sanjay Ghemawat Sunipa Dev Henryk Michalewski Xavier Garcia Vedant Misra Kevin Robinson Liam Fedus Denny Zhou Daphne Ippolito David Luan Hyeontaek Lim Barret Zoph Alexander Spiridonov Ryan Sepassi David Dohan Shivani Agrawal Mark Omernick Andrew M. Dai Thanumalayan Sankaranarayana Pillai Marie Pellat Aitor Lewkowycz Erica Moreira Rewon Child Oleksandr Polozov Katherine Lee Zongwei Zhou Xuezhi Wang Brennan Saeta Mark Diaz Orhan Firat Michele Catasta Jason Wei Kathy Meier-Hellstern Douglas Eck Jeff Dean Slav Petrov and Noah Fiedel. 2022. PaLM: Scaling Language Modeling with Pathways. https:\/\/doi.org\/10.48550\/ARXIV.2204.02311"},{"key":"e_1_3_2_1_11_1","volume-title":"System Architecture: TPU v4. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm##tpu_v4","author":"Cloud Google","year":"2022","unstructured":"Google Cloud . 2022 . System Architecture: TPU v4. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm##tpu_v4 Google Cloud. 2022. System Architecture: TPU v4. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm##tpu_v4"},{"key":"e_1_3_2_1_12_1","first-page":"6010","article-title":"Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing","volume":"34","author":"Dai Zihang","year":"2021","unstructured":"Zihang Dai , Guokun Lai , Yiming Yang , and Quoc V. Le . 2021 . Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing . In Advances in Neural Information Processing Systems. 34 , 6010 \u2013 6022 . https:\/\/dl.acm.org\/doi\/10.5555\/3495724.3496083 Zihang Dai, Guokun Lai, Yiming Yang, and Quoc V. Le. 2021. Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing. In Advances in Neural Information Processing Systems. 34, 6010\u20136022. https:\/\/dl.acm.org\/doi\/10.5555\/3495724.3496083","journal-title":"Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_13_1","first-page":"3965","article-title":"CoAtNet: Marrying Convolution and Attention for All Data Sizes","volume":"34","author":"Dai Zihang","year":"2021","unstructured":"Zihang Dai , Hanxiao Liu , Quoc V. Le , and Mingxing Tan . 2021 . CoAtNet: Marrying Convolution and Attention for All Data Sizes . In Advances in Neural Information Processing Systems. 34 , 3965 \u2013 3977 . https:\/\/doi.org\/10.48550\/arXiv.2106.04803 10.48550\/arXiv.2106.04803 Zihang Dai, Hanxiao Liu, Quoc V. Le, and Mingxing Tan. 2021. CoAtNet: Marrying Convolution and Attention for All Data Sizes. In Advances in Neural Information Processing Systems. 34, 3965\u20133977. https:\/\/doi.org\/10.48550\/arXiv.2106.04803","journal-title":"Advances in Neural Information Processing Systems."},{"key":"#cr-split#-e_1_3_2_1_14_1.1","unstructured":"Jeffrey Dean. 2019. The Deep Learning Revolution and Its Implications for Computer Architecture and Chip Design. arxiv:1911.05289. https:\/\/doi.org\/10.48550\/arXiv.1911.05289 10.48550\/arXiv.1911.05289"},{"key":"#cr-split#-e_1_3_2_1_14_1.2","unstructured":"Jeffrey Dean. 2019. The Deep Learning Revolution and Its Implications for Computer Architecture and Chip Design. arxiv:1911.05289. https:\/\/doi.org\/10.48550\/arXiv.1911.05289"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the European Conference on Computer Vision. 517\u2013531","author":"Dong Jin-Dong","year":"2018","unstructured":"Jin-Dong Dong , An-Chieh Cheng , Da-Cheng Juan , Wei Wei , and Min Sun . 2018 . Ppp-net: Platform-aware progressive search for pareto-optimal neural architectures . In Proceedings of the European Conference on Computer Vision. 517\u2013531 . Jin-Dong Dong, An-Chieh Cheng, Da-Cheng Juan, Wei Wei, and Min Sun. 2018. Ppp-net: Platform-aware progressive search for pareto-optimal neural architectures. In Proceedings of the European Conference on Computer Vision. 517\u2013531."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00186"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10","author":"Elsken Thomas","year":"2019","unstructured":"Thomas Elsken , Jan Hendrik Metzen , and Frank Hutter . 2019 . Efficient multi-objective neural architecture search via lamarckian evolution . In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10 .48550\/arXiv.1804.09081 10.48550\/arXiv.1804.09081 Thomas Elsken, Jan Hendrik Metzen, and Frank Hutter. 2019. Efficient multi-objective neural architecture search via lamarckian evolution. In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10.48550\/arXiv.1804.09081"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of Machine Learning Research. https:\/\/doi.org\/10","author":"Fedus William","year":"2022","unstructured":"William Fedus , Barret Zoph , and Noam Shazeer . 2022 . Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity . In Proceedings of Machine Learning Research. https:\/\/doi.org\/10 .48550\/arXiv.2101.03961 10.48550\/arXiv.2101.03961 William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity. In Proceedings of Machine Learning Research. https:\/\/doi.org\/10.48550\/arXiv.2101.03961"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_32"},{"key":"e_1_3_2_1_20_1","volume-title":"On-device Intelligence Workshop, in conjunction with the 3rd SysML Conference. https:\/\/doi.org\/10","author":"Gupta Suyog","year":"2020","unstructured":"Suyog Gupta and Berkin Akin . 2020 . Accelerator-aware Neural Network Design using AutoML . In On-device Intelligence Workshop, in conjunction with the 3rd SysML Conference. https:\/\/doi.org\/10 .48550\/arXiv.2003.02838 10.48550\/arXiv.2003.02838 Suyog Gupta and Berkin Akin. 2020. Accelerator-aware Neural Network Design using AutoML. In On-device Intelligence Workshop, in conjunction with the 3rd SysML Conference. https:\/\/doi.org\/10.48550\/arXiv.2003.02838"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_23_1","volume-title":"MONAS: Multi-Objective Neural Architecture Search using Reinforcement Learning. arXiv preprint arXiv:1806.10332, https:\/\/doi.org\/10.48550\/arXiv.1806.10332","author":"Hsu Chi-Hung","year":"2018","unstructured":"Chi-Hung Hsu , Shu-Huan Chang , Da-Cheng Juan , Jia-Yu Pan , Yu-Ting Chen , Wei Wei , and Shih-Chieh Chang . 2018 . MONAS: Multi-Objective Neural Architecture Search using Reinforcement Learning. arXiv preprint arXiv:1806.10332, https:\/\/doi.org\/10.48550\/arXiv.1806.10332 10.48550\/arXiv.1806.10332 Chi-Hung Hsu, Shu-Huan Chang, Da-Cheng Juan, Jia-Yu Pan, Yu-Ting Chen, Wei Wei, and Shih-Chieh Chang. 2018. MONAS: Multi-Objective Neural Architecture Search using Reinforcement Learning. arXiv preprint arXiv:1806.10332, https:\/\/doi.org\/10.48550\/arXiv.1806.10332"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_26_1","first-page":"07191","article-title":"Neural architecture search with bayesian optimisation and optimal transport","volume":"1802","author":"Kandasamy Kirthevasan","year":"2018","unstructured":"Kirthevasan Kandasamy , Willie Neiswanger , Jeff Schneider , Barnabas Poczos , and Eric Xing . 2018 . Neural architecture search with bayesian optimisation and optimal transport . In Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv. 1802 . 07191 10.48550\/arXiv.1802.07191 Kirthevasan Kandasamy, Willie Neiswanger, Jeff Schneider, Barnabas Poczos, and Eric Xing. 2018. Neural architecture search with bayesian optimisation and optimal transport. In Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv.1802.07191","journal-title":"Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv."},{"key":"e_1_3_2_1_27_1","volume-title":"Differentiable NAS Framework and Application to Ads CTR Prediction. CoRR, abs\/2110.14812","author":"Krishna Ravi","year":"2021","unstructured":"Ravi Krishna , Aravind Kalaiah , Bichen Wu , Maxim Naumov , Dheevatsa Mudigere , Misha Smelyanskiy , and Kurt Keutzer . 2021. Differentiable NAS Framework and Application to Ads CTR Prediction. CoRR, abs\/2110.14812 ( 2021 ), arXiv:2110.14812. arxiv:2110.14812 Ravi Krishna, Aravind Kalaiah, Bichen Wu, Maxim Naumov, Dheevatsa Mudigere, Misha Smelyanskiy, and Kurt Keutzer. 2021. Differentiable NAS Framework and Application to Ads CTR Prediction. CoRR, abs\/2110.14812 (2021), arXiv:2110.14812. arxiv:2110.14812"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10","author":"Lepikhin Dmitry","year":"2021","unstructured":"Dmitry Lepikhin , HyoukJoong Lee , Yuanzhong Xu , Dehao Chen , Orhan Firat , Yanping Huang , Maxim Krikun , Noam Shazeer , and Zhifeng Chen . 2021 . GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding . In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10 .48550\/arXiv.2006.16668 10.48550\/arXiv.2006.16668 Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2021. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10.48550\/arXiv.2006.16668"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8085\u20138095","author":"Li Sheng","key":"e_1_3_2_1_29_1","unstructured":"Sheng Li , Mingxing Tan , Ruoming Pang , Andrew Li , Liqun Cheng , Quoc V. Le , and Norman P. Jouppi . 2021. Searching for Fast Model Families on Datacenter Accelerators . In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8085\u20138095 . https:\/\/doi.org\/10.48550\/arXiv.2102.05610 10.48550\/arXiv.2102.05610 Sheng Li, Mingxing Tan, Ruoming Pang, Andrew Li, Liqun Cheng, Quoc V. Le, and Norman P. Jouppi. 2021. Searching for Fast Model Families on Datacenter Accelerators. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8085\u20138095. https:\/\/doi.org\/10.48550\/arXiv.2102.05610"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00936"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10","author":"Liu Hanxiao","year":"2019","unstructured":"Hanxiao Liu , Karen Simonyan , and Yiming Yang . 2019 . DARTS: Differentiable Architecture Search . In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10 .48550\/arXiv.1806.09055 10.48550\/arXiv.1806.09055 Hanxiao Liu, Karen Simonyan, and Yiming Yang. 2019. DARTS: Differentiable Architecture Search. In Proceedings of the International Conference on Learning Representations. https:\/\/doi.org\/10.48550\/arXiv.1806.09055"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3321707.3321729"},{"key":"e_1_3_2_1_33_1","first-page":"07233","article-title":"Neural architecture optimization","volume":"1808","author":"Luo Renqian","year":"2018","unstructured":"Renqian Luo , Fei Tian , Tao Qin , and Tie-Yan Liu . 2018 . Neural architecture optimization . In Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv. 1808 . 07233 10.48550\/arXiv.1808.07233 Renqian Luo, Fei Tian, Tao Qin, and Tie-Yan Liu. 2018. Neural architecture optimization. In Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv.1808.07233","journal-title":"Advances in Neural Information Processing Systems. 31, https:\/\/doi.org\/10.48550\/arXiv."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00354"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the IEEE\/ACM International Symposium on Computer Architecture. 993\u20131011","author":"Mudigere Dheevatsa","year":"2022","unstructured":"Dheevatsa Mudigere , Yuchen Hao , Jianyu Huang , Andrew Tulloch , Srinivas Sridharan , Xing Liu , Mustafa Ozdal , Jade Nie , Jongsoo Park , Liang Luo , Jie Amy Yang , Leon Gao , Dmytro Ivchenko , Aarti Basant , Yuxi Hu , Jiyan Yang , Ehsan K. Ardestani , Xiaodong Wang , Rakesh Komuravelli , Ching-Hsiang Chu , Serhat Yilmaz , Huayu Li , Jiyuan Qian , Zhuobo Feng , Yinbin Ma , Junjie Yang , Ellie Wen , Hong Li , Lin Yang , Chonglin Sun , Whitney Zhao , Dimitry Melts , Krishna Dhulipala , K. R. Kishore , Tyler Graf , Assaf Eisenman , Kiran Kumar Matam , Adi Gangidi , Guoqiang Jerry Chen , Manoj Krishnan , Avinash Nayak , Krishnakumar Nair , Bharath Muthiah , Mahmoud khorashadi, Pallab Bhattacharya , Petr Lapukhov , Maxim Naumov , Lin Qiao , Mikhail Smelyanskiy , Bill Jia , and Vijay Rao . 2022 . High-performance, Distributed Training of Large-scale Deep Learning Recommendation Models . In Proceedings of the IEEE\/ACM International Symposium on Computer Architecture. 993\u20131011 . https:\/\/doi.org\/10.48550\/arXiv.2104.05158 10.48550\/arXiv.2104.05158 Dheevatsa Mudigere, Yuchen Hao, Jianyu Huang, Andrew Tulloch, Srinivas Sridharan, Xing Liu, Mustafa Ozdal, Jade Nie, Jongsoo Park, Liang Luo, Jie Amy Yang, Leon Gao, Dmytro Ivchenko, Aarti Basant, Yuxi Hu, Jiyan Yang, Ehsan K. Ardestani, Xiaodong Wang, Rakesh Komuravelli, Ching-Hsiang Chu, Serhat Yilmaz, Huayu Li, Jiyuan Qian, Zhuobo Feng, Yinbin Ma, Junjie Yang, Ellie Wen, Hong Li, Lin Yang, Chonglin Sun, Whitney Zhao, Dimitry Melts, Krishna Dhulipala, K. R. Kishore, Tyler Graf, Assaf Eisenman, Kiran Kumar Matam, Adi Gangidi, Guoqiang Jerry Chen, Manoj Krishnan, Avinash Nayak, Krishnakumar Nair, Bharath Muthiah, Mahmoud khorashadi, Pallab Bhattacharya, Petr Lapukhov, Maxim Naumov, Lin Qiao, Mikhail Smelyanskiy, Bill Jia, and Vijay Rao. 2022. High-performance, Distributed Training of Large-scale Deep Learning Recommendation Models. In Proceedings of the IEEE\/ACM International Symposium on Computer Architecture. 993\u20131011. https:\/\/doi.org\/10.48550\/arXiv.2104.05158"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture. White Paper. \t\t\t\t  NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture. White Paper."},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA. 2022. NVIDIA H100 Tensor Core GPU Architecture. White Paper. \t\t\t\t  NVIDIA. 2022. NVIDIA H100 Tensor Core GPU Architecture. White Paper."},{"key":"e_1_3_2_1_39_1","unstructured":"OpenAI. 2018. AI and Compute. https:\/\/openai.com\/blog\/ai-and-compute\/ \t\t\t\t  OpenAI. 2018. AI and Compute. https:\/\/openai.com\/blog\/ai-and-compute\/"},{"key":"e_1_3_2_1_40_1","unstructured":"Opensource. 2023. CoAtNet-H. https:\/\/github.com\/tensorflow\/tpu\/tree\/master\/models\/official\/coatnet\/tpu \t\t\t\t  Opensource. 2023. CoAtNet-H. https:\/\/github.com\/tensorflow\/tpu\/tree\/master\/models\/official\/coatnet\/tpu"},{"key":"e_1_3_2_1_41_1","unstructured":"Opensource. 2023. EfficientNet-H. https:\/\/github.com\/tensorflow\/tpu\/tree\/master\/models\/official\/efficientnet\/tpu \t\t\t\t  Opensource. 2023. EfficientNet-H. https:\/\/github.com\/tensorflow\/tpu\/tree\/master\/models\/official\/efficientnet\/tpu"},{"key":"e_1_3_2_1_42_1","volume-title":"Carbon Emissions and Large Neural Network Training. CoRR, abs\/2104.10350","author":"Patterson David A.","year":"2021","unstructured":"David A. Patterson , Joseph Gonzalez , Quoc V. Le , Chen Liang , Lluis-Miquel Munguia , Daniel Rothchild , David R. So , Maud Texier , and Jeff Dean . 2021. Carbon Emissions and Large Neural Network Training. CoRR, abs\/2104.10350 ( 2021 ), arXiv:2104.10350. arxiv:2104.10350 David A. Patterson, Joseph Gonzalez, Quoc V. Le, Chen Liang, Lluis-Miquel Munguia, Daniel Rothchild, David R. So, Maud Texier, and Jeff Dean. 2021. Carbon Emissions and Large Neural Network Training. CoRR, abs\/2104.10350 (2021), arXiv:2104.10350. arxiv:2104.10350"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_46_1","volume-title":"XLA : Compiling Machine Learning for Peak Performance.","author":"Sabne Amit","year":"2020","unstructured":"Amit Sabne . 2020 . XLA : Compiling Machine Learning for Peak Performance. Amit Sabne. 2020. XLA : Compiling Machine Learning for Peak Performance."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638949"},{"key":"e_1_3_2_1_48_1","first-page":"6010","article-title":"Primer: Searching for Efficient Transformers for Language Modeling","volume":"34","author":"So David R.","year":"2021","unstructured":"David R. So , Wojciech Manke , Hanxiao Liu , Zihang Dai , Noam Shazeer , and Quoc V. Le . 2021 . Primer: Searching for Efficient Transformers for Language Modeling . In Advances in Neural Information Processing Systems. 34 , 6010 \u2013 6022 . https:\/\/doi.org\/10.48550\/arXiv.2109.08668 10.48550\/arXiv.2109.08668 David R. So, Wojciech Manke, Hanxiao Liu, Zihang Dai, Noam Shazeer, and Quoc V. Le. 2021. Primer: Searching for Efficient Transformers for Language Modeling. In Advances in Neural Information Processing Systems. 34, 6010\u20136022. https:\/\/doi.org\/10.48550\/arXiv.2109.08668","journal-title":"Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_49_1","volume-title":"Single-Path NAS: Device-Aware Efficient ConvNet Design. In Joint Workshop on On-Device Machine Learning & Compact Deep Neural Network Representations (ODML-CDNNR 2019","author":"Stamoulis Dimitrios","year":"1905","unstructured":"Dimitrios Stamoulis , Ruizhou Ding , Di Wang , Dimitrios Lymberopoulos , Bodhi Priyantha , Jie Liu , and Diana Marculescu . [n. d.]. Single-Path NAS: Device-Aware Efficient ConvNet Design. In Joint Workshop on On-Device Machine Learning & Compact Deep Neural Network Representations (ODML-CDNNR 2019 ). https:\/\/doi.org\/10.48550\/arXiv. 1905 .04159 10.48550\/arXiv.1905.04159 Dimitrios Stamoulis, Ruizhou Ding, Di Wang, Dimitrios Lymberopoulos, Bodhi Priyantha, Jie Liu, and Diana Marculescu. [n. d.]. Single-Path NAS: Device-Aware Efficient ConvNet Design. In Joint Workshop on On-Device Machine Learning & Compact Deep Neural Network Representations (ODML-CDNNR 2019). https:\/\/doi.org\/10.48550\/arXiv.1905.04159"},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. https:\/\/doi.org\/10","author":"Tan Mingxing","year":"1807","unstructured":"Mingxing Tan , Bo Chen , Ruoming Pang , Vijay Vasudevan , Mark Sandler , Andrew Howard , and Quoc V. Le . 2019. MnasNet: Platform-aware neural architecture search for mobile . In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. https:\/\/doi.org\/10 .48550\/arXiv. 1807 .11626 10.48550\/arXiv.1807.11626 Mingxing Tan, Bo Chen, Ruoming Pang, Vijay Vasudevan, Mark Sandler, Andrew Howard, and Quoc V. Le. 2019. MnasNet: Platform-aware neural architecture search for mobile. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. https:\/\/doi.org\/10.48550\/arXiv.1807.11626"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the International Conference on Machine Learning, https:\/\/doi.org\/10","author":"Tan Mingxing","year":"1905","unstructured":"Mingxing Tan and Quoc V. Le . 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks . Proceedings of the International Conference on Machine Learning, https:\/\/doi.org\/10 .48550\/arXiv. 1905 .11946 10.48550\/arXiv.1905.11946 Mingxing Tan and Quoc V. Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. Proceedings of the International Conference on Machine Learning, https:\/\/doi.org\/10.48550\/arXiv.1905.11946"},{"key":"e_1_3_2_1_52_1","volume-title":"Manso","author":"Thompson Neil C.","year":"2020","unstructured":"Neil C. Thompson , Kristjan H. Greenewald , Keeheon Lee , and Gabriel F . Manso . 2020 . The Computational Limits of Deep Learning. CoRR , abs\/2007.05558 (2020), arXiv:2007.05558. arxiv:2007.05558 Neil C. Thompson, Kristjan H. Greenewald, Keeheon Lee, and Gabriel F. Manso. 2020. The Computational Limits of Deep Learning. CoRR, abs\/2007.05558 (2020), arXiv:2007.05558. arxiv:2007.05558"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.686"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01099"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_41"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"#cr-split#-e_1_3_2_1_58_1.1","unstructured":"Yanqi Zhou Siavash Ebrahimi Sercan \u00d6 Ar\u0131k Haonan Yu Hairong Liu and Greg Diamos. 2018. Resource-efficient neural architect. arXiv preprint arXiv:1806.07912 https:\/\/doi.org\/10.48550\/arXiv.1806.07912 10.48550\/arXiv.1806.07912"},{"key":"#cr-split#-e_1_3_2_1_58_1.2","unstructured":"Yanqi Zhou Siavash Ebrahimi Sercan \u00d6 Ar\u0131k Haonan Yu Hairong Liu and Greg Diamos. 2018. Resource-efficient neural architect. arXiv preprint arXiv:1806.07912 https:\/\/doi.org\/10.48550\/arXiv.1806.07912"},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the International Conference on Learning Representations, https:\/\/doi.org\/10","author":"Zoph Barret","year":"2017","unstructured":"Barret Zoph and Quoc V Le . 2017 . Neural architecture search with reinforcement learning . Proceedings of the International Conference on Learning Representations, https:\/\/doi.org\/10 .48550\/arXiv.1611.01578 10.48550\/arXiv.1611.01578 Barret Zoph and Quoc V Le. 2017. Neural architecture search with reinforcement learning. Proceedings of the International Conference on Learning Representations, https:\/\/doi.org\/10.48550\/arXiv.1611.01578"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"}],"event":{"name":"ASPLOS '23: 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Vancouver BC Canada","acronym":"ASPLOS '23"},"container-title":["Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3582016.3582049","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3582016.3582049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:45Z","timestamp":1750178805000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3582016.3582049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,25]]},"references-count":63,"alternative-id":["10.1145\/3582016.3582049","10.1145\/3582016"],"URL":"https:\/\/doi.org\/10.1145\/3582016.3582049","relation":{},"subject":[],"published":{"date-parts":[[2023,3,25]]},"assertion":[{"value":"2023-03-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}