{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T08:26:19Z","timestamp":1768033579099,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T00:00:00Z","timestamp":1709683200000},"content-version":"vor","delay-in-days":3,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Department of Energy","award":["DE-AC52-07NA27344, DE-SC0022182"],"award-info":[{"award-number":["DE-AC52-07NA27344, DE-SC0022182"]}]},{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2119348"],"award-info":[{"award-number":["2119348"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,3]]},"DOI":"10.1145\/3649169.3649246","type":"proceedings-article","created":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T06:04:07Z","timestamp":1709705047000},"page":"22-31","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["MUPPET"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7511-0269","authenticated-orcid":false,"given":"Dolores","family":"Miao","sequence":"first","affiliation":[{"name":"University of California, Davis, Davis, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9374-4433","authenticated-orcid":false,"given":"Ignacio","family":"Laguna","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, Livermore, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6542-3555","authenticated-orcid":false,"given":"Giorgis","family":"Georgakoudis","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, Livermore, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8258-9693","authenticated-orcid":false,"given":"Konstantinos","family":"Parasyris","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, Livermore, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0861-3763","authenticated-orcid":false,"given":"Cindy","family":"Rubio-Gonz\u00e1lez","sequence":"additional","affiliation":[{"name":"University of California, Davis, Davis, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,3,6]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2018. Performance Engineering of Software Systems. https:\/\/ocw.mit.edu\/courses\/6-172-performance-engineering-of-software-systems-fall-2018\/"},{"key":"e_1_3_2_1_2_1","volume-title":"Abu-Sufah and Asma Abdel Karim","author":"Walid","year":"2013","unstructured":"Walid A. Abu-Sufah and Asma Abdel Karim. 2013. Auto-tuning of Sparse Matrix-Vector Multiplication on Graphics Processors. In ISC (Lecture Notes in Computer Science), Vol. 7905. Springer, 151--164."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/1753228.1753233"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Jason Ansel Shoaib Kamil Kalyan Veeramachaneni et al. 2014. Open-Tuner: an extensible framework for program autotuning. In PACT. ACM 303--316.","DOI":"10.1145\/2628071.2628092"},{"key":"e_1_3_2_1_5_1","volume-title":"Nafees Iqbal, Foyzul Hassan, et al.","year":"2023","unstructured":"Md. Abul Kalam Azad, Nafees Iqbal, Foyzul Hassan, et al. 2023. An Empirical Study of High Performance Computing (HPC) Performance Bugs. In MSR. IEEE, 194--206."},{"key":"e_1_3_2_1_6_1","volume-title":"Apollo: Reusable Models for Fast, Dynamic Tuning of Input-Dependent Code","author":"Beckingsale David","year":"2017","unstructured":"David Beckingsale, Olga Pearce, Ignacio Laguna, et al. 2017. Apollo: Reusable Models for Fast, Dynamic Tuning of Input-Dependent Code. In IPDPS. IEEE Computer Society, 307--316."},{"key":"e_1_3_2_1_7_1","first-page":"1","article-title":"Using automated performance modeling to find scalability bugs in complex codes","volume":"45","author":"Calotoiu Alexandru","year":"2013","unstructured":"Alexandru Calotoiu, Torsten Hoefler, Marius Poke, et al. 2013. Using automated performance modeling to find scalability bugs in complex codes. In SC. ACM, 45:1--45:12.","journal-title":"SC. ACM"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2018.05.010"},{"key":"e_1_3_2_1_9_1","unstructured":"Chun Chen Jacqueline Chame and Mary Hall. 2008. CHiLL: A framework for composing high-level loop transformations. Technical Report. Citeseer."},{"key":"e_1_3_2_1_10_1","volume-title":"Sergio Segura, et al.","author":"Delgado-P\u00e9rez Pedro","year":"2020","unstructured":"Pedro Delgado-P\u00e9rez, Ana Bel\u00e9n S\u00e1nchez, Sergio Segura, et al. 2020. Performance mutation testing. Software Testing, Verification and Reliability (2020), e1728."},{"key":"e_1_3_2_1_11_1","volume-title":"Mull It Over: Mutation Testing Based on LLVM. In ICST Workshops. IEEE Computer Society, 25--31","author":"Denisov Alex","year":"2018","unstructured":"Alex Denisov and Stanislav Pankevich. 2018. Mull It Over: Mutation Testing Based on LLVM. In ICST Workshops. IEEE Computer Society, 25--31."},{"key":"e_1_3_2_1_12_1","volume-title":"PMBS@SC","author":"Ding Nan","unstructured":"Nan Ding and Samuel Williams. 2019. An Instruction Roofline Model for GPUs. In PMBS@SC. IEEE, 7--18."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2018.2868961"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342015593158"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Akash Dutta Jordi Alcaraz Ali TehraniJamsaz et al. 2023. Performance Optimization using Multimodal Modeling and Heterogeneous GNN. In HPDC. ACM 45--57.","DOI":"10.1145\/3588195.3592984"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Akash Dutta Jordi Alcaraz Ali TehraniJamsaz et al. 2022. Pattern-based Autotuning of OpenMP Loops using Graph Neural Networks. In AI4S. IEEE 26--31.","DOI":"10.1109\/AI4S56813.2022.00010"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40698-0_13"},{"key":"e_1_3_2_1_18_1","unstructured":"The Exascale Co-Design Center for Materials in Extreme Environments (ExMatEx). 2013. CoMD - Classical molecular dynamics proxy application. https:\/\/github.com\/ECP-copa\/CoMD."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840301"},{"key":"e_1_3_2_1_20_1","volume-title":"Autotuning Batch Cholesky Factorization in CUDA with Interleaved Layout of Matrices. In IPDPS Workshops. IEEE Computer Society, 1408--1417","author":"Gates Mark","year":"2017","unstructured":"Mark Gates, Jakub Kurzak, Piotr Luszczek, et al. 2017. Autotuning Batch Cholesky Factorization in CUDA with Interleaved Layout of Matrices. In IPDPS Workshops. IEEE Computer Society, 1408--1417."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1556"},{"key":"e_1_3_2_1_22_1","volume-title":"FAROS: A Framework to Analyze OpenMP Compilation Through Benchmarking and Compiler Optimization Analysis. In IWOMP (Lecture Notes in Computer Science)","author":"Georgakoudis Giorgis","year":"2020","unstructured":"Giorgis Georgakoudis, Johannes Doerfert, Ignacio Laguna, et al. 2020. FAROS: A Framework to Analyze OpenMP Compilation Through Benchmarking and Compiler Optimization Analysis. In IWOMP (Lecture Notes in Computer Science), Vol. 12295. Springer, 3--17."},{"key":"e_1_3_2_1_23_1","unstructured":"Giorgis Georgakoudis Konstantinos Parasyris Chunhua Liao et al. 2023. Machine Learning-Driven Adaptive OpenMP For Portable Performance on Heterogeneous Systems. arXiv:cs.PL\/2303.08873"},{"key":"e_1_3_2_1_24_1","unstructured":"Tim Head Manoj Kumar Holger Nahrstaedt et al. 2021. scikit-optimize\/scikit-optimize."},{"key":"e_1_3_2_1_25_1","volume-title":"An analysis and survey of the development of mutation testing","author":"Jia Yue","year":"2010","unstructured":"Yue Jia and Mark Harman. 2010. An analysis and survey of the development of mutation testing. IEEE transactions on software engineering 37, 5 (2010), 649--678."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Ian Karlin Abhinav Bhatele Jeff Keasler et al. 2013. Exploring Traditional and Emerging Parallel Programming Models Using a Proxy Application. In IPDPS. IEEE Computer Society 919--932.","DOI":"10.1109\/IPDPS.2013.115"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2481890"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20656-7_12"},{"key":"e_1_3_2_1_29_1","volume-title":"International Workshop on Languages and Compilers for Parallel Computing. Springer, 308--322","author":"Liao Chunhua","year":"2009","unstructured":"Chunhua Liao, Daniel J Quinlan, Richard Vuduc, et al. 2009. Effective source-to-source outlining to support whole program empirical optimization. In International Workshop on Languages and Compilers for Parallel Computing. Springer, 308--322."},{"key":"e_1_3_2_1_30_1","volume-title":"International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems. Springer, 129--148","author":"Lo Yu Jung","year":"2014","unstructured":"Yu Jung Lo, Samuel Williams, Brian Van Straalen, et al. 2014. Roofline model toolkit: A practical tool for architectural and program analysis. In International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems. Springer, 129--148."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2021.07.021"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Diogo Marques Helder Duarte Aleksandar Ilic et al. 2017. Performance Analysis with Cache-Aware Roofline Model in Intel Advisor. In HPCS. IEEE 898--907.","DOI":"10.1109\/HPCS.2017.150"},{"key":"e_1_3_2_1_33_1","volume-title":"Mohamed Wahib, et al.","author":"Matsumura Kazuaki","year":"2020","unstructured":"Kazuaki Matsumura, Hamid Reza Zohouri, Mohamed Wahib, et al. 2020. AN5D: automated stencil framework for high-degree temporal blocking on GPUs. In CGO. ACM, 199--211."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01099263"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the department of defense HPCMP users group conference","volume":"710","author":"Mucci Philip J","year":"1999","unstructured":"Philip J Mucci, Shirley Browne, Christine Deane, et al. 1999. PAPI: A portable interface to hardware performance counters. In Proceedings of the department of defense HPCMP users group conference, Vol. 710. Citeseer."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/GREENCOMP.2010.5598315"},{"key":"e_1_3_2_1_37_1","volume-title":"6th International Workshop on Parallel Matrix Algorithms and Applications (PMAA'10)","author":"Nath Rajib","year":"2010","unstructured":"Rajib Nath, Stanimire Tomov, Jack Dongarra, et al. 2010. Autotuning dense linear algebra libraries on gpus and overview of the magma library. In 6th International Workshop on Parallel Matrix Algorithms and Applications (PMAA'10)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSoC.2015.10"},{"key":"e_1_3_2_1_39_1","first-page":"1","article-title":"Scalable Tuning of (OpenMP) GPU Applications via Kernel Record and Replay","volume":"28","author":"Parasyris Konstantinos","year":"2023","unstructured":"Konstantinos Parasyris, Giorgis Georgakoudis, Esteban Rangel, et al. 2023. Scalable Tuning of (OpenMP) GPU Applications via Kernel Record and Replay. In SC. ACM, 28:1--28:14.","journal-title":"SC. ACM"},{"key":"e_1_3_2_1_40_1","volume-title":"Cetus users and compiler infrastructure workshop, in conjunction with PACT","author":"Quinlan Dan","unstructured":"Dan Quinlan and Chunhua Liao. 2011. The ROSE source-to-source compiler infrastructure. In Cetus users and compiler infrastructure workshop, in conjunction with PACT, Vol. 2011. Citeseer, 1."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2018.2862896"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Prashant Singh Rawat Miheer Vaidya Aravind Sukumaran-Rajam et al. 2019. On Optimizing Complex Stencils on GPUs. In IPDPS. IEEE 641--652.","DOI":"10.1109\/IPDPS.2019.00073"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Rohan Basu Roy Tirthak Patel Vijay Gadepally et al. 2021. Bliss: auto-tuning complex applications using a pool of diverse lightweight learning models. In PLDI. ACM 1280--1295.","DOI":"10.1145\/3453483.3454109"},{"key":"e_1_3_2_1_44_1","volume-title":"Hong Diep Nguyen, et al","author":"Rubio-Gonz\u00e1lez Cindy","year":"2013","unstructured":"Cindy Rubio-Gonz\u00e1lez, Cuong Nguyen, Hong Diep Nguyen, et al. 2013. Precimonious: tuning assistant for floating-point precision. In SC, William Gropp and Satoshi Matsuoka (Eds.). ACM, 27."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006064482"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/LATW.2012.6261240"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28596-8_4"},{"key":"e_1_3_2_1_48_1","volume-title":"Hollingsworth","author":"Tapus Cristian","year":"2002","unstructured":"Cristian Tapus, I-Hsin Chung, and Jeffrey K. Hollingsworth. 2002. Active Harmony: Towards Automated Performance Tuning. In Proceedings of the 2002 ACM\/IEEE Conference on Supercomputing (SC '02). IEEE Computer Society Press, Washington, DC, USA, 1--11."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Jayaraman J. Thiagarajan Nikhil Jain Rushil Anirudh et al. 2018. Bootstrapping Parameter Space Exploration for Fast Tuning. In ICS. ACM 385--395.","DOI":"10.1145\/3205289.3205321"},{"key":"e_1_3_2_1_50_1","volume-title":"SC'98: Proceedings of the 1998 ACM\/IEEE conference on Supercomputing. IEEE, 38--38","author":"Clinton Whaley R","year":"1998","unstructured":"R Clinton Whaley and Jack J Dongarra. 1998. Automatically tuned linear algebra software. In SC'98: Proceedings of the 1998 ACM\/IEEE conference on Supercomputing. IEEE, 38--38."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_52_1","volume-title":"Artemis: Automatic Runtime Tuning of Parallel Execution Parameters Using Machine Learning. In ISC (Lecture Notes in Computer Science)","author":"Wood Chad","year":"2021","unstructured":"Chad Wood, Giorgis Georgakoudis, David Beckingsale, et al. 2021. Artemis: Automatic Runtime Tuning of Parallel Execution Parameters Using Machine Learning. In ISC (Lecture Notes in Computer Science), Vol. 12728. Springer, 453--472."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2012.30"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370637"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Xin You Hailong Yang Zhonghui Jiang et al. 2021. DRStencil: Exploiting Data Reuse within Low-order Stencil on GPU. In HPCC\/DSS\/SmartCity\/DependSys. IEEE 63--70.","DOI":"10.1109\/HPCC-DSS-SmartCity-DependSys53884.2021.00036"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/32.988498"}],"event":{"name":"PPoPP '24: The 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Edinburgh United Kingdom","acronym":"PPoPP '24","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 15th International Workshop on Programming Models and Applications for Multicores and Manycores"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649169.3649246","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649169.3649246","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649169.3649246","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:27:51Z","timestamp":1755980871000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649169.3649246"}},"subtitle":["Optimizing Performance in OpenMP via Mutation Testing"],"short-title":[],"issued":{"date-parts":[[2024,3,3]]},"references-count":56,"alternative-id":["10.1145\/3649169.3649246","10.1145\/3649169"],"URL":"https:\/\/doi.org\/10.1145\/3649169.3649246","relation":{},"subject":[],"published":{"date-parts":[[2024,3,3]]},"assertion":[{"value":"2024-03-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}