{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T11:33:12Z","timestamp":1751369592176,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T00:00:00Z","timestamp":1648425600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"name":"ShuiMu Tsinghua Scholar fellowship","award":["2019SM131"],"award-info":[{"award-number":["2019SM131"]}]},{"name":"Tsinghua University Initiative Scientific Research Program","award":["20191080594"],"award-info":[{"award-number":["20191080594"]}]},{"name":"National Key R&D Program of China","award":["2021YFB0300300"],"award-info":[{"award-number":["2021YFB0300300"]}]},{"name":"National Natural Science Foundation of China","award":["U20A20226"],"award-info":[{"award-number":["U20A20226"]}]},{"name":"University of Sydney faculty startup funding"},{"name":"SOAR fellowship"},{"name":"Australia Research Council (ARC) Discovery Project","award":["DP210101984"],"award-info":[{"award-number":["DP210101984"]}]},{"name":"Beijing Natural Science Foundation","award":["4202031"],"award-info":[{"award-number":["4202031"]}]},{"name":"China Postdoctoral Science Foundation","award":["2020TQ0169"],"award-info":[{"award-number":["2020TQ0169"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,4,2]]},"DOI":"10.1145\/3503221.3508411","type":"proceedings-article","created":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T13:58:22Z","timestamp":1648475902000},"page":"150-162","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Vapro"],"prefix":"10.1145","author":[{"given":"Liyan","family":"Zheng","sequence":"first","affiliation":[{"name":"Tsinghua University"}]},{"given":"Jidong","family":"Zhai","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Xiongchao","family":"Tang","sequence":"additional","affiliation":[{"name":"Tsinghua University and Sangfor Technologies Inc."}]},{"given":"Haojie","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Teng","family":"Yu","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Yuyang","family":"Jin","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Shuaiwen Leon","family":"Song","sequence":"additional","affiliation":[{"name":"University of Sydney"}]},{"given":"Wenguang","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University and BNRist"}]}],"member":"320","published-online":{"date-parts":[[2022,3,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. The cuBERT framework. https:\/\/github.com\/zhihu\/cuBERT."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. The MapReduce framework. https:\/\/github.com\/sysprog21\/mapreduce."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. The Nekbone program. https:\/\/github.com\/Nek5000\/Nekbone."},{"key":"e_1_3_2_1_4_1","unstructured":"[n.d.]. The parallel PageRank program. https:\/\/github.com\/nikos912000\/parallel-pagerank."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. stress. https:\/\/packages.debian.org\/buster\/stress"},{"key":"e_1_3_2_1_6_1","volume-title":"[n.d.]. Compilers: Principles, Techniques, and Tools","author":"Aho Alfred V.","unstructured":"Alfred V. Aho, Monica S. Lam, Ravi Sethi, and Jeffrey D. Ullman. [n.d.]. Compilers: Principles, Techniques, and Tools (2nd Edition). Addison-Wesley Longman Publishing Co., Inc.","edition":"2"},{"key":"e_1_3_2_1_7_1","volume-title":"21st IEEE International Parallel & Distributed Processing Symposium (IPDPS'07)","author":"Arnold Dorian C","year":"2007","unstructured":"Dorian C Arnold, Dong H Ahn, BR De Supinski, Gregory Lee, BP Miller, and Martin Schulz. 2007. Stack trace analysis for large scale applications. In 21st IEEE International Parallel & Distributed Processing Symposium (IPDPS'07), Long Beach, CA."},{"key":"e_1_3_2_1_8_1","unstructured":"Mona Attariyan Michael Chow and Jason Flinn. 2012. X-ray: Automating root-cause diagnosis of performance anomalies in production software. In Presented as part of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI'12). 307--320."},{"key":"e_1_3_2_1_9_1","unstructured":"D. Bailey T. Harris W. Saphir R. V. D. Wijngaart A. Woo and M. Yarrow. 1995. The NAS Parallel Benchmarks 2.0. NAS Systems Division NASA Ames Research Center Moffett Field CA."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2018436.2018465"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2006.311846"},{"volume-title":"Grouping Multidimensional Data: Recent Advances in Clustering","author":"Berkhin P.","key":"e_1_3_2_1_12_1","unstructured":"P. Berkhin. 2006. A Survey of Clustering Data Mining Techniques. In Grouping Multidimensional Data: Recent Advances in Clustering, Jacob Kogan, Charles Nicholas, and Marc Teboulle (Eds.). Springer."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2024569.2024572"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454128"},{"key":"e_1_3_2_1_15_1","volume-title":"MRNet: A scalable infrastructure for the development of parallel tools and applications","author":"Brim Michael J","year":"2010","unstructured":"Michael J Brim, Luiz DeRose, Barton P Miller, Ramya Olichandran, and Philip C Roth. 2010. MRNet: A scalable infrastructure for the development of parallel tools and applications. Cray User Group (2010)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2858800"},{"volume-title":"Slides from Linux Kongress","author":"De Melo Arnaldo Carvalho","key":"e_1_3_2_1_17_1","unstructured":"Arnaldo Carvalho De Melo. 2010. The new linux perf tools. In Slides from Linux Kongress, Vol. 18. 1--42."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2371536.2371572"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670987"},{"key":"e_1_3_2_1_20_1","volume-title":"The LINPACK benchmark: past, present and future. Concurrency and Computation: practice and experience 15, 9","author":"Dongarra Jack J","year":"2003","unstructured":"Jack J Dongarra, Piotr Luszczek, and Antoine Petitet. 2003. The LINPACK benchmark: past, present and future. Concurrency and Computation: practice and experience 15, 9 (2003), 803--820."},{"key":"e_1_3_2_1_21_1","volume-title":"Multicollinearity in regression analysis: the problem revisited. The Review of Economic and Statistics","author":"Farrar Donald E","year":"1967","unstructured":"Donald E Farrar and Robert R Glauber. 1967. Multicollinearity in regression analysis: the problem revisited. The Review of Economic and Statistics (1967), 92--107."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-011-0178-3"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.85"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/44483.44488"},{"key":"e_1_3_2_1_25_1","first-page":"23","article-title":"Fail-slow at scale: Evidence of hardware performance faults in large production systems","volume":"14","author":"Gunawi Haryadi S","year":"2018","unstructured":"Haryadi S Gunawi, Riza O Suminto, Russell Sears, Casey Golliher, Swaminathan Sundararaman, Xing Lin, Tim Emami, Weiguang Sheng, Nematollah Bidokhti, Caitie McCaffrey, et al. 2018. Fail-slow at scale: Evidence of hardware performance faults in large production systems. ACM Transactions on Storage (TOS) 14, 3 (2018), 23.","journal-title":"ACM Transactions on Storage (TOS)"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.12"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/177424.178042"},{"key":"e_1_3_2_1_28_1","unstructured":"Intel. 2018. Addressing Potential DGEMM\/HPL Perf Variability on 24-Core Intel Xeon Processor Scalable Family. White paper number 606269 revision 1.0."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1175\/BAMS-D-13-00255.1"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2014.2314100"},{"key":"e_1_3_2_1_32_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)","author":"Maricq Aleksander","year":"2018","unstructured":"Aleksander Maricq, Dmitry Duplyakin, Ivo Jimenez, Carlos Maltzahn, Ryan Stutsman, and Robert Ricci. 2018. Taming performance variability. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18). 409--425."},{"volume-title":"Memory Bandwidth: STREAM Benchmark Performance Results. https:\/\/www.cs.virginia.edu\/stream\/","year":"2018","key":"e_1_3_2_1_33_1","unstructured":"John. McCalpin. 2018. Memory Bandwidth: STREAM Benchmark Performance Results. https:\/\/www.cs.virginia.edu\/stream\/"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00021"},{"volume-title":"ACM SIGPLAN Notices (PLDI'14)","author":"Mitra Subrata","key":"e_1_3_2_1_35_1","unstructured":"Subrata Mitra, Ignacio Laguna, Dong H Ahn, Saurabh Bagchi, Martin Schulz, and Todd Gamblin. 2014. Accurate application progress analysis for large-scale parallel debugging. In ACM SIGPLAN Notices (PLDI'14), Vol. 49. ACM, 193--203."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3014904.3014949"},{"key":"e_1_3_2_1_37_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC'19)","author":"Panda Biswaranjan","year":"2019","unstructured":"Biswaranjan Panda, Deepthi Srinivasan, Huan Ke, Karan Gupta, Vinayak Khot, and Haryadi S Gunawi. 2019. IASO: a fail-slow detection and mitigation framework for distributed storage services. In 2019 USENIX Annual Technical Conference (USENIX ATC'19). 47--62."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1048935.1050204"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL). 410--420","author":"Rosenberg Andrew","year":"2007","unstructured":"Andrew Rosenberg and Julia Hirschberg. 2007. V-measure: A conditional entropy-based external cluster evaluation measure. In Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL). 410--420."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451131"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920902"},{"key":"e_1_3_2_1_42_1","volume-title":"4th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud'12)","author":"Schwarzkopf Malte","year":"2012","unstructured":"Malte Schwarzkopf, Derek G Murray, and Steven Hand. 2012. The seven deadly sins of cloud computing research. In 4th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud'12)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-96983-1_4"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2001.953283"},{"key":"e_1_3_2_1_45_1","volume-title":"ACM SIGARCH Computer Architecture News","volume":"31","author":"Sherwood Timothy","year":"2003","unstructured":"Timothy Sherwood, Suleyman Sair, and Brad Calder. 2003. Phase tracking and prediction. In ACM SIGARCH Computer Architecture News, Vol. 31. ACM, 336--349."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btl446"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356167"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the 23rd ACM SIGPLAN symposium on principles and practice of parallel programming (PPoPP'18)","author":"Tang Xiongchao","year":"2018","unstructured":"Xiongchao Tang, Jidong Zhai, Xuehai Qian, Bingsheng He, Wei Xue, and Wenguang Chen. 2018. vSensor: leveraging fixed-workload snippets of programs for performance variance detection. In Proceedings of the 23rd ACM SIGPLAN symposium on principles and practice of parallel programming (PPoPP'18). 124--136."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/511399.511364"},{"key":"e_1_3_2_1_50_1","unstructured":"Jeffrey Vetter and Chris Chambreau. 2005. mpip: Lightweight scalable mpi profiling. (2005)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2013.6557172"},{"key":"e_1_3_2_1_52_1","volume-title":"Performance measurement and analysis of large-scale parallel applications on leadership computing systems. Scientific programming 16, 2--3 (April","author":"Wylie Brian J. N.","year":"2008","unstructured":"Brian J. N. Wylie, Markus Geimer, and Felix Wolf. 2008. Performance measurement and analysis of large-scale parallel applications on leadership computing systems. Scientific programming 16, 2--3 (April 2008), 167--181."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0168-9274(01)00115-5"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844459"},{"key":"e_1_3_2_1_55_1","volume-title":"Large-Scale Automatic K-Means Clustering for Heterogeneous Many-Core Supercomputer","author":"Yu Teng","year":"2019","unstructured":"Teng Yu, Wenlai Zhao, Pan Liu, Vladimir Janjic, Xiaohan Yan, Shicai Wang, Haohuan Fu, Guangwen Yang, and John Thomson. 2019. Large-Scale Automatic K-Means Clustering for Heterogeneous Many-Core Supercomputer. IEEE Transactions on Parallel and Distributed Systems (TPDS) (2019)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.17"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2613524"}],"event":{"name":"PPoPP '22: 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Seoul Republic of Korea","acronym":"PPoPP '22"},"container-title":["Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503221.3508411","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503221.3508411","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:49Z","timestamp":1750186849000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503221.3508411"}},"subtitle":["performance variance detection and diagnosis for production-run parallel applications"],"short-title":[],"issued":{"date-parts":[[2022,3,28]]},"references-count":56,"alternative-id":["10.1145\/3503221.3508411","10.1145\/3503221"],"URL":"https:\/\/doi.org\/10.1145\/3503221.3508411","relation":{},"subject":[],"published":{"date-parts":[[2022,3,28]]},"assertion":[{"value":"2022-03-28","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}