{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T00:46:20Z","timestamp":1768005980060,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T00:00:00Z","timestamp":1761004800000},"content-version":"vor","delay-in-days":84,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2330961"],"award-info":[{"award-number":["2330961"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,29]]},"DOI":"10.1145\/3736731.3746158","type":"proceedings-article","created":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T11:58:19Z","timestamp":1761047899000},"page":"143-156","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Helical: A High Level Language Framework for Specifying Hypotheses and Experiments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2333-8034","authenticated-orcid":false,"given":"Emma","family":"Tosch","sequence":"first","affiliation":[{"name":"College of Arts, Media, and Design, Northeastern University, Boston, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8694-3422","authenticated-orcid":false,"given":"Gwen","family":"Lincroft","sequence":"additional","affiliation":[{"name":"Khoury College of Computing, Northeastern University, Boston, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,21]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641525.3663648"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646031"},{"key":"e_1_3_3_2_4_2","volume-title":"2018 NeurIPS Workshop on Systems for ML","author":"Bakshy Eytan","year":"2018","unstructured":"Eytan Bakshy, Lili Dworkin, Brian Karrer, Konstantin Kashin, Benjamin Letham, Ashwin Murthy, and Shaun Singh. 2018. AE: A domain-agnostic platform for adaptive experimentation. In 2018 NeurIPS Workshop on Systems for ML."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/2566486.2567967"},{"key":"e_1_3_3_2_6_2","unstructured":"Maximilian Balandat Brian Karrer Daniel Jiang Samuel Daulton Ben Letham Andrew\u00a0G Wilson and Eytan Bakshy. 2020. BoTorch: a framework for efficient Monte-Carlo Bayesian optimization. Advances in neural information processing systems 33 (2020) 21524\u201321538."},{"key":"e_1_3_3_2_7_2","unstructured":"Daniel Barowy Charlie Curtsinger Emma Tosch John Vilk and Emery Berger. 2015. HOWTO for AEC Submitters. (2015)."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Emery\u00a0D Berger Celeste Hollenbeck Petr Maj Olga Vitek and Jan Vitek. 2019. On the impact of programming languages on code quality: A reproduction study. ACM Transactions on Programming Languages and Systems (TOPLAS) 41 4 (2019) 1\u201324.","DOI":"10.1145\/3340571"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641525.3663617"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Charlie Curtsinger and Emery\u00a0D Berger. 2013. Stabilizer: Statistically sound performance evaluation. ACM SIGARCH Computer Architecture News 41 1 (2013) 219\u2013228.","DOI":"10.1145\/2490301.2451141"},{"key":"e_1_3_3_2_11_2","volume-title":"Titanic: Triumph and tragedy","author":"Eaton John\u00a0P","year":"1995","unstructured":"John\u00a0P Eaton and Charles Haas. 1995. Titanic: Triumph and tragedy. WW Norton & Company."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Frederick Eberhardt. 2016. Green and grue causal variables. Synthese 193 4 (2016) 1029\u20131046.","DOI":"10.1007\/s11229-015-0832-z"},{"key":"e_1_3_3_2_13_2","unstructured":"Logan Engstrom and Andrew Ilyas. 2019. Cox: A Lightweight Experimental Design Library. (2019). https:\/\/github.com\/MadryLab\/cox"},{"key":"e_1_3_3_2_14_2","volume-title":"International Conference on Learning Representations","author":"Engstrom Logan","year":"2020","unstructured":"Logan Engstrom, Andrew Ilyas, Shibani Santurkar, Dimitris Tsipras, Firdaus Janoos, Larry Rudolph, and Aleksander Madry. 2020. Implementation Matters in Deep Policy Gradients: A Case Study on PPO and TRPO. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Carlo\u00a0A Furia Richard Torkar and Robert Feldt. 2023. Towards causal analysis of empirical software engineering data: The impact of programming languages on coding competitions. ACM Transactions on Software Engineering and Methodology 33 1 (2023) 1\u201335.","DOI":"10.1145\/3611667"},{"key":"e_1_3_3_2_16_2","volume-title":"The Stanford Encyclopedia of Philosophy (Fall 2022 ed.)","author":"Gallow J.\u00a0Dmitri","year":"2022","unstructured":"J.\u00a0Dmitri Gallow. 2022. The Metaphysics of Causation. In The Stanford Encyclopedia of Philosophy (Fall 2022 ed.), Edward\u00a0N. Zalta and Uri Nodelman (Eds.). Metaphysics Research Lab, Stanford University."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Benjamin Haibe-Kains George\u00a0Alexandru Adam Ahmed Hosny Farnoosh Khodakarami Levi Waldron Bo Wang Chris McIntosh Anna Goldenberg Anshul Kundaje Casey\u00a0S Greene et\u00a0al. 2020. Transparency and reproducibility in artificial intelligence. Nature 586 7829 (2020) E14\u2013E16.","DOI":"10.1038\/s41586-020-2766-y"},{"key":"e_1_3_3_2_18_2","unstructured":"Frank\u00a0E Harrell\u00a0Jr.2002. Titanic Data. https:\/\/hbiostat.org\/data\/repo\/titanic.html."},{"key":"e_1_3_3_2_19_2","volume-title":"AAAI Conference on Artificial Intelligence (AAAI)","author":"Henderson Peter","year":"2017","unstructured":"Peter Henderson, Riashat Islam, Philip Bachman, Joelle Pineau, Doina Precup, and David Meger. 2017. Deep Reinforcement Learning that Matters. In AAAI Conference on Artificial Intelligence (AAAI). arXiv preprint 1709.06560."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"John\u00a0L Henning. 2006. SPEC CPU2006 benchmark descriptions. ACM SIGARCH Computer Architecture News 34 4 (2006) 1\u201317.","DOI":"10.1145\/1186736.1186737"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409767"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"John\u00a0PA Ioannidis. 2005. Why most published research findings are false. PLoS medicine 2 8 (2005) e124.","DOI":"10.1371\/journal.pmed.0020124"},{"key":"e_1_3_3_2_23_2","unstructured":"Ivo Jimenez Michael Sevilla Noah Watkins Carlos Maltzahn Jay Lofstead Kathryn Mohror Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau. 2016. Standing on the shoulders of giants by managing scientific experiments like software. USENIX; login 41 4 (2016) 20\u201326."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2017.157"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2635868.2635929"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Ron Kohavi and Roger Longbotham. 2015. Online controlled experiments and A\/B tests. Encyclopedia of Machine Learning and Data Mining C. Sammut and G. Webb Eds.","DOI":"10.1007\/978-1-4899-7502-7_891-1"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Ron Kohavi Roger Longbotham Dan Sommerfield and Randal\u00a0M Henne. 2009. Controlled experiments on the Web: survey and practical guide. Data Mining and Knowledge Discovery 18 1 (2009) 140\u2013181.","DOI":"10.1007\/s10618-008-0114-1"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.5555\/1795555"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Shriram Krishnamurthi. 2013. Artifact evaluation for software conferences. ACM SIGSOFT Software Engineering Notes 38 3 (2013) 7\u201310.","DOI":"10.1145\/2464526.2464530"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Shriram Krishnamurthi and Jan Vitek. 2015. The real software crisis: Repeatability as a core value. Commun. ACM 58 3 (2015) 34\u201336.","DOI":"10.1145\/2658987"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Jennifer McDonald. 2025. Causal models and causal relativism. Synthese 205 3 (2025) 108.","DOI":"10.1007\/s11229-024-04893-5"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Daniel M\u00e9ndez\u00a0Fern\u00e1ndez Wolfgang B\u00f6hm Andreas Vogelsang Jakob Mund Manfred Broy Marco Kuhrmann and Thorsten Weyer. 2019. Artefacts in software engineering: a fundamental positioning. Software & Systems Modeling 18 (2019) 2777\u20132786.","DOI":"10.1007\/s10270-019-00714-3"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/2384592.2384597"},{"key":"e_1_3_3_2_34_2","first-page":"1073","volume-title":"Advances in Neural Information Processing Systems","author":"Minka Tom","year":"2009","unstructured":"Tom Minka and John Winn. 2009. Gates. In Advances in Neural Information Processing Systems. 1073\u20131080."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Todd Mytkowicz Amer Diwan Matthias Hauswirth and Peter\u00a0F Sweeney. 2009. Producing wrong data without doing anything obviously wrong! ACM Sigplan Notices 44 3 (2009) 265\u2013276.","DOI":"10.1145\/1508284.1508275"},{"key":"e_1_3_3_2_36_2","volume-title":"CogSci","author":"Ouyang Long","year":"2018","unstructured":"Long Ouyang, Michael\u00a0Henry Tessler, Daniel Ly, and Noah\u00a0D Goodman. 2018. webppl-oed: A practical optimal experiment design system.. In CogSci."},{"key":"e_1_3_3_2_37_2","unstructured":"Rohan Padhye. 2019. Artifact Evaluation: Tips for Authors. (2019)."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/2635868.2635922"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Donald\u00a0B Rubin. 2007. The design versus the analysis of observational studies for causal effects: parallels with the design of randomized trials. Statistics in medicine 26 1 (2007) 20\u201336.","DOI":"10.1002\/sim.2739"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3501714.3501755"},{"key":"e_1_3_3_2_42_2","volume-title":"Experimental and quasi-experimental designs for generalized causal inference","author":"Shadish William\u00a0R.","year":"2002","unstructured":"William\u00a0R. Shadish, Thomas\u00a0D. Cook, and Donald\u00a0T. Campbell. 2002. Experimental and quasi-experimental designs for generalized causal inference. Houghton Mifflin Company, Boston, MA, USA."},{"key":"e_1_3_3_2_43_2","volume-title":"3X: A Data Management System for Computational Experiments (Demonstration Proposal)","author":"Shin Jaeho","year":"2013","unstructured":"Jaeho Shin, Andreas Paepcke, and Jennifer Widom. 2013. 3X: A Data Management System for Computational Experiments (Demonstration Proposal). Technical Report. Stanford University. http:\/\/ilpubs.stanford.edu:8090\/1080\/"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"Julien Siebert. 2023. Applications of statistical causal inference in software engineering. Information and Software Technology 159 (2023) 107198.","DOI":"10.1016\/j.infsof.2023.107198"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","unstructured":"Byran Smucker Martin Krzywinski and Naomi Altman. 2018. Optimal experimental design. Nature Methods 15 8 (2018) 559\u2013560. 10.1038\/s41592-018-0083-2","DOI":"10.1038\/s41592-018-0083-2"},{"key":"e_1_3_3_2_46_2","unstructured":"Peter Spirtes. 2009. Variable definition and causal inference. (2009)."},{"key":"e_1_3_3_2_47_2","first-page":"197","volume-title":"Joel on Software: And on Diverse and Occasionally Related Matters That Will Prove of Interest to Software Developers, Designers, and Managers, and to Those Who, Whether by Good Fortune or Ill Luck, Work with Them in Some Capacity","author":"Spolsky Joel","year":"2002","unstructured":"Joel Spolsky. 2002. The law of leaky abstractions. In Joel on Software: And on Diverse and Occasionally Related Matters That Will Prove of Interest to Software Developers, Designers, and Managers, and to Those Who, Whether by Good Fortune or Ill Luck, Work with Them in Some Capacity. Springer, 197\u2013202."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/2815400.2815401"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/1835804.1835810"},{"key":"e_1_3_3_2_50_2","unstructured":"Johannes Textor Benito van\u00a0der Zander Mark\u00a0K. Gilthorpe Maciej Liskiewicz and George\u00a0T.H. Ellison. 2016. Robust causal inference using directed acyclic graphs: the R package \u2019dagitty\u2019. International Journal of Epidemiology 45 (2016) 1887\u20131894. Issue 6."},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"crossref","unstructured":"Emma Tosch Eytan Bakshy Emery\u00a0D Berger David\u00a0D Jensen and J\u00a0Eliot\u00a0B Moss. 2019. PlanAlyzer: Assessing threats to the validity of online experiments. Proceedings of the ACM on Programming Languages 3 OOPSLA (2019) 1\u201330.","DOI":"10.1145\/3360608"},{"key":"e_1_3_3_2_52_2","volume-title":"Proceedings of the Sixth Conference on Uncertainty in Artificial Intelligence, 1991","author":"Verma Thomas","year":"1991","unstructured":"Thomas Verma. 1991. Equivalence and Synthesis of Causal Models. In Proceedings of the Sixth Conference on Uncertainty in Artificial Intelligence, 1991. Elsevier."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/2576195.2576216"},{"key":"e_1_3_3_2_54_2","volume-title":"Making things happen: A theory of causal explanation","author":"Woodward James","year":"2005","unstructured":"James Woodward. 2005. Making things happen: A theory of causal explanation. Oxford university press."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"James Woodward. 2016. The problem of variable choice. Synthese 193 (2016) 1047\u20131072.","DOI":"10.1007\/s11229-015-0810-5"},{"key":"e_1_3_3_2_56_2","unstructured":"Sewall Wright. 1921. Correlation and causation. Journal of agricultural research 20 7 (1921) 557\u2013585."}],"event":{"name":"ACM REP '25: ACM Conference on Reproducibility and Replicability","location":"Vancouver Canada","acronym":"ACM REP '25","sponsor":["EIGREP Emerging Interest Group on Reproducibility and Replicability"]},"container-title":["Proceedings of the 3rd ACM Conference on Reproducibility and Replicability"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736731.3746158","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736731.3746158","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T18:04:23Z","timestamp":1767981863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736731.3746158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,29]]},"references-count":55,"alternative-id":["10.1145\/3736731.3746158","10.1145\/3736731"],"URL":"https:\/\/doi.org\/10.1145\/3736731.3746158","relation":{},"subject":[],"published":{"date-parts":[[2025,7,29]]},"assertion":[{"value":"2025-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}