{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T14:35:51Z","timestamp":1772462151212,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589072","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Mystique: Enabling Accurate and Scalable Generation of Production AI Benchmarks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1973-2557","authenticated-orcid":false,"given":"Mingyu","family":"Liang","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, New York, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4785-321X","authenticated-orcid":false,"given":"Wenyin","family":"Fu","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2007-1790","authenticated-orcid":false,"given":"Louis","family":"Feng","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5992-3913","authenticated-orcid":false,"given":"Zhongyi","family":"Lin","sequence":"additional","affiliation":[{"name":"University of California, Davis, Davis, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0318-9951","authenticated-orcid":false,"given":"Pavani","family":"Panakanti","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3904-8725","authenticated-orcid":false,"given":"Shengbao","family":"Zheng","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0651-370X","authenticated-orcid":false,"given":"Srinivas","family":"Sridharan","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7779-4134","authenticated-orcid":false,"given":"Christina","family":"Delimitrou","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1","volume-title":"2016 IEEE International Symposium on Workload Characterization (IISWC)","author":"Adolf Robert","year":"2016","unstructured":"Robert Adolf , Saketh Rama , Brandon Reagen , Gu-Yeon Wei , and David Brooks . Fathom : Reference workloads for modern deep learning methods . In 2016 IEEE International Symposium on Workload Characterization (IISWC) , pages 1 -- 10 . IEEE, 2016 . Robert Adolf, Saketh Rama, Brandon Reagen, Gu-Yeon Wei, and David Brooks. Fathom: Reference workloads for modern deep learning methods. In 2016 IEEE International Symposium on Workload Characterization (IISWC), pages 1--10. IEEE, 2016."},{"key":"e_1_3_2_1_2_1","unstructured":"ATen. Aten. https:\/\/github.com\/pytorch\/pytorch\/tree\/master\/aten.  ATen. Aten. https:\/\/github.com\/pytorch\/pytorch\/tree\/master\/aten."},{"key":"e_1_3_2_1_3_1","unstructured":"Baidu. Deepbench. https:\/\/github.com\/baidu-research\/DeepBench.  Baidu. Deepbench. https:\/\/github.com\/baidu-research\/DeepBench."},{"key":"e_1_3_2_1_4_1","volume-title":"Peter Torelli, Jeremy Holleman, Nat Jeffries, Csaba Kiraly, Pietro Montino, David Kanter, Sebastian Ahmed, Danilo Pau, et al. Mlperf tiny benchmark. arXiv preprint arXiv:2106.07597","author":"Banbury Colby","year":"2021","unstructured":"Colby Banbury , Vijay Janapa Reddi , Peter Torelli, Jeremy Holleman, Nat Jeffries, Csaba Kiraly, Pietro Montino, David Kanter, Sebastian Ahmed, Danilo Pau, et al. Mlperf tiny benchmark. arXiv preprint arXiv:2106.07597 , 2021 . Colby Banbury, Vijay Janapa Reddi, Peter Torelli, Jeremy Holleman, Nat Jeffries, Csaba Kiraly, Pietro Montino, David Kanter, Sebastian Ahmed, Danilo Pau, et al. Mlperf tiny benchmark. arXiv preprint arXiv:2106.07597, 2021."},{"key":"e_1_3_2_1_5_1","volume-title":"Tutorial at the International Symposium on Microarchitecture (MICRO)","author":"Beckmann Bradford M","year":"2015","unstructured":"Bradford M Beckmann and Anthony Gutierrez . The amd gem5 apu simulator: Modeling heterogeneous systems in gem5 . In Tutorial at the International Symposium on Microarchitecture (MICRO) , 2015 . Bradford M Beckmann and Anthony Gutierrez. The amd gem5 apu simulator: Modeling heterogeneous systems in gem5. In Tutorial at the International Symposium on Microarchitecture (MICRO), 2015."},{"key":"e_1_3_2_1_6_1","volume-title":"The gem5 simulator. ACM SIGARCH computer architecture news, 39(2):1--7","author":"Binkert Nathan","year":"2011","unstructured":"Nathan Binkert , Bradford Beckmann , Gabriel Black , Steven K Reinhardt , Ali Saidi , Arkaprava Basu , Joel Hestness , Derek R Hower , Tushar Krishna , Somayeh Sardashti , The gem5 simulator. ACM SIGARCH computer architecture news, 39(2):1--7 , 2011 . Nathan Binkert, Bradford Beckmann, Gabriel Black, Steven K Reinhardt, Ali Saidi, Arkaprava Basu, Joel Hestness, Derek R Hower, Tushar Krishna, Somayeh Sardashti, et al. The gem5 simulator. ACM SIGARCH computer architecture news, 39(2):1--7, 2011."},{"key":"e_1_3_2_1_7_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901","author":"Brown Tom","year":"2020","unstructured":"Tom Brown , Benjamin Mann , Nick Ryder , Melanie Subbiah , Jared D Kaplan , Prafulla Dhariwal , Arvind Neelakantan , Pranav Shyam , Girish Sastry , Amanda Askell , Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901 , 2020 . Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901, 2020."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","first-page":"96","DOI":"10.1109\/ISPASS51385.2021.00027","volume-title":"2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","author":"Buch Michael","year":"2021","unstructured":"Michael Buch , Zahra Azad , Ajay Joshi , and Vijay Janapa Reddi . Ai tax in mobile socs: End-to-end performance analysis of machine learning in smartphones . In 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS) , pages 96 -- 106 . IEEE, 2021 . Michael Buch, Zahra Azad, Ajay Joshi, and Vijay Janapa Reddi. Ai tax in mobile socs: End-to-end performance analysis of machine learning in smartphones. In 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pages 96--106. IEEE, 2021."},{"key":"e_1_3_2_1_9_1","first-page":"1","volume-title":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Carlson Trevor E","year":"2011","unstructured":"Trevor E Carlson , Wim Heirman , and Lieven Eeckhout . Sniper : Exploring the level of abstraction for scalable and accurate parallel multi-core simulation . In Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis , pages 1 -- 12 , 2011 . Trevor E Carlson, Wim Heirman, and Lieven Eeckhout. Sniper: Exploring the level of abstraction for scalable and accurate parallel multi-core simulation. In Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, pages 1--12, 2011."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"issue":"101","key":"e_1_3_2_1_11_1","first-page":"102","article-title":"An end-to-end deep learning benchmark and competition","volume":"100","author":"Coleman Cody","year":"2017","unstructured":"Cody Coleman , Deepak Narayanan , Daniel Kang , Tian Zhao , Jian Zhang , Luigi Nardi , Peter Bailis , Kunle Olukotun , Chris R\u00e9 , and Matei Zaharia . Dawnbench : An end-to-end deep learning benchmark and competition . Training , 100 ( 101 ): 102 , 2017 . Cody Coleman, Deepak Narayanan, Daniel Kang, Tian Zhao, Jian Zhang, Luigi Nardi, Peter Bailis, Kunle Olukotun, Chris R\u00e9, and Matei Zaharia. Dawnbench: An end-to-end deep learning benchmark and competition. Training, 100(101):102, 2017.","journal-title":"Training"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304074"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1109\/ISPASS.2006.1620807","volume-title":"2006 IEEE International Symposium on Performance Analysis of Systems and Software","author":"Del Barrio Victor Moya","year":"2006","unstructured":"Victor Moya Del Barrio , Carlos Gonz\u00e1lez , Jordi Roca , Agust\u00edn Fern\u00e1ndez , and E Espasa . Attila : a cycle-level execution-driven simulator for modern gpu architectures . In 2006 IEEE International Symposium on Performance Analysis of Systems and Software , pages 231 -- 241 . IEEE, 2006 . Victor Moya Del Barrio, Carlos Gonz\u00e1lez, Jordi Roca, Agust\u00edn Fern\u00e1ndez, and E Espasa. Attila: a cycle-level execution-driven simulator for modern gpu architectures. In 2006 IEEE International Symposium on Performance Analysis of Systems and Software, pages 231--241. IEEE, 2006."},{"key":"e_1_3_2_1_14_1","first-page":"2014","article-title":"Resource-Efficient and QoS-Aware Cluster Management. In Proceedings of the Nineteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). Salt Lake City","author":"Delimitrou Christina","year":"2014","unstructured":"Christina Delimitrou and Christos Kozyrakis . Quasar : Resource-Efficient and QoS-Aware Cluster Management. In Proceedings of the Nineteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). Salt Lake City , UT, USA , 2014 , 2014 . Christina Delimitrou and Christos Kozyrakis. Quasar: Resource-Efficient and QoS-Aware Cluster Management. In Proceedings of the Nineteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). Salt Lake City, UT, USA, 2014, 2014.","journal-title":"UT, USA"},{"key":"e_1_3_2_1_15_1","volume-title":"Christos Kozyrakis. ECHO: Recreating Network Traffic Maps for Datacenters of Tens of Thousands of Servers. In Proceedings of the IEEE International Symposium on Workload Characterization (IISWC)","author":"Delimitrou Christina","year":"2012","unstructured":"Christina Delimitrou , Sriram Sankar , Aman Kansal , and Christos Kozyrakis. ECHO: Recreating Network Traffic Maps for Datacenters of Tens of Thousands of Servers. In Proceedings of the IEEE International Symposium on Workload Characterization (IISWC) , 2012 . Christina Delimitrou, Sriram Sankar, Aman Kansal, and Christos Kozyrakis. ECHO: Recreating Network Traffic Maps for Datacenters of Tens of Thousands of Servers. In Proceedings of the IEEE International Symposium on Workload Characterization (IISWC), 2012."},{"key":"e_1_3_2_1_16_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 , 2018 . Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_17_1","unstructured":"Facebook. Fbgemm. https:\/\/github.com\/pytorch\/FBGEMM.  Facebook. Fbgemm. https:\/\/github.com\/pytorch\/FBGEMM."},{"key":"e_1_3_2_1_18_1","unstructured":"Facebook. Param benchmarks. https:\/\/github.com\/facebookresearch\/param.  Facebook. Param benchmarks. https:\/\/github.com\/facebookresearch\/param."},{"key":"e_1_3_2_1_19_1","unstructured":"Facebook. torchrec. https:\/\/github.com\/pytorch\/torchrec.  Facebook. torchrec. https:\/\/github.com\/pytorch\/torchrec."},{"key":"e_1_3_2_1_20_1","unstructured":"Facebook. torchvision models. https:\/\/pytorch.org\/vision\/stable\/models.html.  Facebook. torchvision models. https:\/\/pytorch.org\/vision\/stable\/models.html."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2018.2839189"},{"key":"e_1_3_2_1_22_1","first-page":"135","volume-title":"Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021","author":"Gan Yu","year":"2021","unstructured":"Yu Gan , Mingyu Liang , Sundar Dev , David Lo , and Christina Delimitrou . Sage : Practical and scalable ml-driven performance debugging in microservices . In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021 , page 135 -- 151 , New York, NY, USA , April 2021 . Association for Computing Machinery. Yu Gan, Mingyu Liang, Sundar Dev, David Lo, and Christina Delimitrou. Sage: Practical and scalable ml-driven performance debugging in microservices. In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2021, page 135--151, New York, NY, USA, April 2021. Association for Computing Machinery."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304013"},{"issue":"4","key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","first-page":"833","DOI":"10.1109\/TC.2013.36","article-title":"Automatic generation of miniaturized synthetic proxies for target applications to efficiently design multicore processors","volume":"63","author":"Ganesan Karthik","year":"2013","unstructured":"Karthik Ganesan and Lizy Kurian John . Automatic generation of miniaturized synthetic proxies for target applications to efficiently design multicore processors . IEEE Transactions on Computers , 63 ( 4 ): 833 -- 846 , 2013 . Karthik Ganesan and Lizy Kurian John. Automatic generation of miniaturized synthetic proxies for target applications to efficiently design multicore processors. IEEE Transactions on Computers, 63(4):833--846, 2013.","journal-title":"IEEE Transactions on Computers"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3163226"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00024"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","first-page":"3873","DOI":"10.1109\/BigData.2018.8622396","volume-title":"2018 IEEE international conference on big data (Big Data)","author":"Justus Daniel","year":"2018","unstructured":"Daniel Justus , John Brennan , Stephen Bonner , and Andrew Stephen McGough . Predicting the computational cost of deep learning models . In 2018 IEEE international conference on big data (Big Data) , pages 3873 -- 3882 . IEEE, 2018 . Daniel Justus, John Brennan, Stephen Bonner, and Andrew Stephen McGough. Predicting the computational cost of deep learning models. In 2018 IEEE international conference on big data (Big Data), pages 3873--3882. IEEE, 2018."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"e_1_3_2_1_32_1","first-page":"943","volume-title":"2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS)","author":"Li Shijian","year":"2020","unstructured":"Shijian Li , Robert J Walls , and Tian Guo . Characterizing and modeling distributed training with transient cloud gpu servers . In 2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS) , pages 943 -- 953 . IEEE, 2020 . Shijian Li, Robert J Walls, and Tian Guo. Characterizing and modeling distributed training with transient cloud gpu servers. In 2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS), pages 943--953. IEEE, 2020."},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the Twenty Eighth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)","author":"Liang Mingyu","year":"2023","unstructured":"Mingyu Liang , Yu Gan , Yueying Li , Carlos Torres , Abhishek Dhanotia , Mahesh Ketkar , and Christina Delimitrou . Ditto: End-to-End Application Cloning for Networked Cloud Services . In Proceedings of the Twenty Eighth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) , April 2023 . Mingyu Liang, Yu Gan, Yueying Li, Carlos Torres, Abhishek Dhanotia, Mahesh Ketkar, and Christina Delimitrou. Ditto: End-to-End Application Cloning for Networked Cloud Services. In Proceedings of the Twenty Eighth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), April 2023."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1109\/ICS51289.2020.00039","volume-title":"2020 International Computer Symposium (ICS)","author":"Liao Ying-Chiao","year":"2020","unstructured":"Ying-Chiao Liao , Chuan-Chi Wang , Chia-Heng Tu , Ming-Chang Kao , Wen-Yew Liang , and Shih-Hao Hung . Perfnetrt : Platform-aware performance modeling for optimized deep neural networks . In 2020 International Computer Symposium (ICS) , pages 153 -- 158 . IEEE, 2020 . Ying-Chiao Liao, Chuan-Chi Wang, Chia-Heng Tu, Ming-Chang Kao, Wen-Yew Liang, and Shih-Hao Hung. Perfnetrt: Platform-aware performance modeling for optimized deep neural networks. In 2020 International Computer Symposium (ICS), pages 153--158. IEEE, 2020."},{"key":"e_1_3_2_1_35_1","volume-title":"Building a performance model for deep learning recommendation model training on gpus. arXiv preprint arXiv:2201.07821","author":"Lin Zhongyi","year":"2022","unstructured":"Zhongyi Lin , Louis Feng , Ehsan K Ardestani , Jaewon Lee , John Lundell , Changkyu Kim , Arun Kejariwal , and John D Owens . Building a performance model for deep learning recommendation model training on gpus. arXiv preprint arXiv:2201.07821 , 2022 . Zhongyi Lin, Louis Feng, Ehsan K Ardestani, Jaewon Lee, John Lundell, Changkyu Kim, Arun Kejariwal, and John D Owens. Building a performance model for deep learning recommendation model training on gpus. arXiv preprint arXiv:2201.07821, 2022."},{"key":"e_1_3_2_1_36_1","first-page":"336","article-title":"Mlperf training benchmark","volume":"2","author":"Mattson Peter","year":"2020","unstructured":"Peter Mattson , Christine Cheng , Gregory Diamos , Cody Coleman , Paulius Micikevicius , David Patterson , Hanlin Tang , Gu-Yeon Wei , Peter Bailis , Victor Bittorf , Mlperf training benchmark . Proceedings of Machine Learning and Systems , 2 : 336 -- 349 , 2020 . Peter Mattson, Christine Cheng, Gregory Diamos, Cody Coleman, Paulius Micikevicius, David Patterson, Hanlin Tang, Gu-Yeon Wei, Peter Bailis, Victor Bittorf, et al. Mlperf training benchmark. Proceedings of Machine Learning and Systems, 2:336--349, 2020.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2974843"},{"key":"e_1_3_2_1_38_1","unstructured":"MLCommons. Mlperf training benchmarks. https:\/\/mlcommons.org\/en\/training-normal-21\/.  MLCommons. Mlperf training benchmarks. https:\/\/mlcommons.org\/en\/training-normal-21\/."},{"key":"e_1_3_2_1_39_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G Azzolini et al. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 2019.  Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G Azzolini et al. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 2019."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_2_1_41_1","unstructured":"NVIDIA. nccl. https:\/\/developer.nvidia.com\/nccl.  NVIDIA. nccl. https:\/\/developer.nvidia.com\/nccl."},{"key":"e_1_3_2_1_42_1","unstructured":"OpenMPI. mpirun. https:\/\/www.open-mpi.org\/doc\/current\/man1\/mpirun.1.php.  OpenMPI. mpirun. https:\/\/www.open-mpi.org\/doc\/current\/man1\/mpirun.1.php."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1109\/PACT.2017.44","volume-title":"2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)","author":"Panda Reena","year":"2017","unstructured":"Reena Panda and Lizy Kurian John . Proxy benchmarks for emerging big-data workloads . In 2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT) , pages 105 -- 116 . IEEE, 2017 . Reena Panda and Lizy Kurian John. Proxy benchmarks for emerging big-data workloads. In 2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT), pages 105--116. IEEE, 2017."},{"key":"e_1_3_2_1_45_1","first-page":"337","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE)","author":"Panda Reena","year":"2018","unstructured":"Reena Panda , Xinnian Zheng , Andreas Gerstlauer , and Lizy Kurian John . Camp : Accurate modeling of core and memory locality for proxy generation of big-data applications. In 2018 Design , Automation & Test in Europe Conference & Exhibition (DATE) , pages 337 -- 342 . IEEE, 2018 . Reena Panda, Xinnian Zheng, Andreas Gerstlauer, and Lizy Kurian John. Camp: Accurate modeling of core and memory locality for proxy generation of big-data applications. In 2018 Design, Automation & Test in Europe Conference & Exhibition (DATE), pages 337--342. IEEE, 2018."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062320"},{"issue":"1","key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/LCA.2014.2299539","article-title":"A heterogeneous cpu-gpu simulator","volume":"14","author":"Power Jason","year":"2014","unstructured":"Jason Power , Joel Hestness , Marc S Orr , Mark D Hill , and David A Wood . gem5-gpu : A heterogeneous cpu-gpu simulator . IEEE Computer Architecture Letters , 14 ( 1 ): 34 -- 36 , 2014 . Jason Power, Joel Hestness, Marc S Orr, Mark D Hill, and David A Wood. gem5-gpu: A heterogeneous cpu-gpu simulator. IEEE Computer Architecture Letters, 14(1):34--36, 2014.","journal-title":"IEEE Computer Architecture Letters"},{"key":"e_1_3_2_1_48_1","unstructured":"PyTorch. c10d. https:\/\/pytorch.org\/docs\/stable\/distributed.html.  PyTorch. c10d. https:\/\/pytorch.org\/docs\/stable\/distributed.html."},{"key":"e_1_3_2_1_49_1","unstructured":"PyTorch. Pytorch profiler. https:\/\/pytorch.org\/tutorials\/recipes\/recipes\/profiler_recipe.html.  PyTorch. Pytorch profiler. https:\/\/pytorch.org\/tutorials\/recipes\/recipes\/profiler_recipe.html."},{"key":"e_1_3_2_1_50_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh , Prafulla Dhariwal , Alex Nichol , Casey Chu , and Mark Chen . Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 , 2022 . Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, 2022."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1109\/ISPASS51385.2021.00018","volume-title":"2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","author":"Ravi Gokul Subramanian","year":"2021","unstructured":"Gokul Subramanian Ravi , Ramon Bertran , Pradip Bose , and Mikko Lipasti . Micrograd : A centralized framework for workload cloning and stress testing . In 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS) , pages 70 -- 72 . IEEE, 2021 . Gokul Subramanian Ravi, Ramon Bertran, Pradip Bose, and Mikko Lipasti. Micrograd: A centralized framework for workload cloning and stress testing. In 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pages 70--72. IEEE, 2021."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","first-page":"515","DOI":"10.1109\/HPCA47549.2020.00049","volume-title":"2020 IEEE International Symposium on High Performance Computer Architecture (HPCA)","author":"Richins Daniel","year":"2020","unstructured":"Daniel Richins , Dharmisha Doshi , Matthew Blackmore , Aswathy Thulaseedharan Nair , Neha Pathapati , Ankit Patel , Brainard Daguman , Daniel Dobrijalowski , Ramesh Illikkal , Kevin Long , Missing the forest for the trees: End-to-end ai application performance in edge data centers . In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA) , pages 515 -- 528 . IEEE, 2020 . Daniel Richins, Dharmisha Doshi, Matthew Blackmore, Aswathy Thulaseedharan Nair, Neha Pathapati, Ankit Patel, Brainard Daguman, Daniel Dobrijalowski, Ramesh Illikkal, Kevin Long, et al. Missing the forest for the trees: End-to-end ai application performance in edge data centers. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA), pages 515--528. IEEE, 2020."},{"key":"e_1_3_2_1_54_1","volume-title":"Burcu Karagol Ayan, S Sara Mahdavi, Rapha Gontijo Lopes, et al. Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia , William Chan , Saurabh Saxena , Lala Li , Jay Whang , Emily Denton , Seyed Kamyar Seyed Ghasemipour , Burcu Karagol Ayan, S Sara Mahdavi, Rapha Gontijo Lopes, et al. Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 , 2022 . Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar Seyed Ghasemipour, Burcu Karagol Ayan, S Sara Mahdavi, Rapha Gontijo Lopes, et al. Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487, 2022."},{"key":"e_1_3_2_1_55_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al. Mastering the game of go with deep neural networks and tree search. nature, 529(7587):484--489","author":"Silver David","year":"2016","unstructured":"David Silver , Aja Huang , Chris J Maddison , Arthur Guez , Laurent Sifre , George Van Den Driessche , Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al. Mastering the game of go with deep neural networks and tree search. nature, 529(7587):484--489 , 2016 . David Silver, Aja Huang, Chris J Maddison, Arthur Guez, Laurent Sifre, George Van Den Driessche, Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al. Mastering the game of go with deep neural networks and tree search. nature, 529(7587):484--489, 2016."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1145\/1346281.1346307","volume-title":"Proceedings of the 13th international conference on Architectural support for programming languages and operating systems","author":"Ertvelde Luk Van","year":"2008","unstructured":"Luk Van Ertvelde and Lieven Eeckhout . Dispersing proprietary applications as benchmarks through code mutation . In Proceedings of the 13th international conference on Architectural support for programming languages and operating systems , pages 201 -- 210 , 2008 . Luk Van Ertvelde and Lieven Eeckhout. Dispersing proprietary applications as benchmarks through code mutation. In Proceedings of the 13th international conference on Architectural support for programming languages and operating systems, pages 201--210, 2008."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/ISPASS48437.2020.00013","volume-title":"2020 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","author":"Verma Snehil","year":"2020","unstructured":"Snehil Verma , Qinzhe Wu , Bagus Hanindhito , Gunjan Jha , Eugene B John , Ramesh Radhakrishnan , and Lizy K John . Demystifying the mlperf training benchmark suite . In 2020 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS) , pages 24 -- 33 . IEEE, 2020 . Snehil Verma, Qinzhe Wu, Bagus Hanindhito, Gunjan Jha, Eugene B John, Ramesh Radhakrishnan, and Lizy K John. Demystifying the mlperf training benchmark suite. In 2020 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pages 24--33. IEEE, 2020."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1109\/ISPASS.2017.7975274","volume-title":"2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","author":"Wang Yipeng","year":"2017","unstructured":"Yipeng Wang , Amro Awad , and Yan Solihin . Clone morphing: creating new workload behavior from existing applications . In 2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS) , pages 97 -- 108 . IEEE, 2017 . Yipeng Wang, Amro Awad, and Yan Solihin. Clone morphing: creating new workload behavior from existing applications. In 2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pages 97--108. IEEE, 2017."},{"key":"e_1_3_2_1_61_1","first-page":"795","article-title":"Sustainable ai: Environmental implications, challenges and opportunities","volume":"4","author":"Wu Carole-Jean","year":"2022","unstructured":"Carole-Jean Wu , Ramya Raghavendra , Udit Gupta , Bilge Acun , Newsha Ardalani , Kiwan Maeng , Gloria Chang , Fiona Aga , Jinshi Huang , Charles Bai , Sustainable ai: Environmental implications, challenges and opportunities . Proceedings of Machine Learning and Systems , 4 : 795 -- 813 , 2022 . Carole-Jean Wu, Ramya Raghavendra, Udit Gupta, Bilge Acun, Newsha Ardalani, Kiwan Maeng, Gloria Chang, Fiona Aga, Jinshi Huang, Charles Bai, et al. Sustainable ai: Environmental implications, challenges and opportunities. Proceedings of Machine Learning and Systems, 4:795--813, 2022.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_62_1","volume-title":"Computational performance predictions for deep neural network training: A runtime-based approach. CoRR","author":"Yu GX","year":"2021","unstructured":"GX Yu , Y Gao , P Golikov , and G Pekhimenko . Computational performance predictions for deep neural network training: A runtime-based approach. CoRR , vol. abs\/ 2102 .00527, 2021 . GX Yu, Y Gao, P Golikov, and G Pekhimenko. Computational performance predictions for deep neural network training: A runtime-based approach. CoRR, vol. abs\/2102.00527, 2021."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2015.2395427"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533044"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573476"},{"key":"e_1_3_2_1_67_1","first-page":"337","volume-title":"2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Zhu Hongyu","year":"2020","unstructured":"Hongyu Zhu , Amar Phanishayee , and Gennady Pekhimenko . Daydream : Accurately estimating the efficacy of optimizations for {DNN} training . In 2020 USENIX Annual Technical Conference (USENIX ATC 20) , pages 337 -- 352 , 2020 . Hongyu Zhu, Amar Phanishayee, and Gennady Pekhimenko. Daydream: Accurately estimating the efficacy of optimizations for {DNN} training. In 2020 USENIX Annual Technical Conference (USENIX ATC 20), pages 337--352, 2020."}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589072","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:39Z","timestamp":1750178799000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589072"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":67,"alternative-id":["10.1145\/3579371.3589072","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589072","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}