{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T21:14:45Z","timestamp":1772313285963,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"MCIN\/AEI\/10.13039\/501100011033, ERDF A way of making Europe","award":["PID2022-137846NB-I00"],"award-info":[{"award-number":["PID2022-137846NB-I00"]}]},{"name":"NSF","award":["2019239"],"award-info":[{"award-number":["2019239"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3611643.3616320","type":"proceedings-article","created":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T23:14:38Z","timestamp":1701386078000},"page":"528-540","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Pitfalls in Experiments with DNN4SE: An Analysis of the State of the Practice"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8535-9386","authenticated-orcid":false,"given":"Sira","family":"Vegas","sequence":"first","affiliation":[{"name":"Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9592-1352","authenticated-orcid":false,"given":"Sebastian","family":"Elbaum","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"The 37th AAAI Conference on Artificial Intelligence Reproducibility Checklist. accessed","year":"2022","unstructured":"2022. The 37th AAAI Conference on Artificial Intelligence Reproducibility Checklist. accessed August 26, 2022."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3224"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2018.10.006"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1985793.1985795"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of Machine Learning and Systems. 747\u2013769","author":"Bouthillier Xavier","year":"2021","unstructured":"Xavier Bouthillier, Pierre Delaunay, Mirko Bronzi, Assya Trofimov, Brennan Nichyporuk, Justin Szeto, Nazanin Mohammadi Sepahvand, Edward Raff, Kanika Madan, Vikram Voleti, Samira Ebrahimi Kahou, Vincent Michalski, Tal Arbel, Chris Pal, Gael Varoquaux, and Pascal Vincent. 2021. Accounting for Variance in Machine Learning Benchmarks. In Proceedings of Machine Learning and Systems. 747\u2013769."},{"key":"e_1_3_2_2_6_1","unstructured":"Prem Devanbu Matthew Dwyer Sebastian Elbaum Michael Lowry Kevin Moran Denys Poshyvanyk Baishakhi Ray Rishabh Singh and Xiangyu Zhang. 2020. Deep Learning & Software Engineering: State of Research and Future Directions. arXiv:2009.08525."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2005.08.009"},{"key":"e_1_3_2_2_8_1","unstructured":"Association for Computing Machinery. 2020. Artifact Review and Badging. https:\/\/www.acm.org\/publications\/policies\/artifact-review-and-badging-current"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 25th European Symposium on Artificial Neural Networks.","author":"Gallicchio Claudio","year":"2017","unstructured":"Claudio Gallicchio, Jos\u00e9 Mart\u00edn-Guerrero, Alessio Micheli, and Emilio Olivas. 2017. Randomized Machine Learning Approaches: Recent Developments and Challenges. In Proceedings of the 25th European Symposium on Artificial Neural Networks."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2014.04.004"},{"key":"e_1_3_2_2_11_1","volume-title":"Deep Learning","author":"Goodfellow Ian","unstructured":"Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press."},{"key":"e_1_3_2_2_12_1","unstructured":"Odd Erik Gundersen Kevin Coakley Christine Kirkpatrick and Yolanda Gil. 2023. Sources of Irreproducibility in Machine Learning: A Review. arXiv:2204.07610v2."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2007.12"},{"key":"e_1_3_2_2_14_1","volume-title":"Guide to Advanced Empirical Software Engineering, Forrest Shull, Janice Singer, and Dag I","author":"Jedlitschka Andreas","unstructured":"Andreas Jedlitschka, Marcus Ciolkowski, and Dietmar Pfahl. 2008. Reporting Experiments in Software Engineering. In Guide to Advanced Empirical Software Engineering, Forrest Shull, Janice Singer, and Dag I.K. Sj\u00f8berg (Eds.). Springer, 201\u2013228."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2015.03.065"},{"key":"e_1_3_2_2_16_1","volume-title":"Basics of software engineering experimentation","author":"Juristo Natalia","unstructured":"Natalia Juristo and Ana M Moreno. 2011. Basics of software engineering experimentation. Springer Science & Business Media."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2007.02.015"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3174092"},{"key":"e_1_3_2_2_19_1","unstructured":"Michael A. Lones. 2023. How to avoid machine learning pitfalls: a guide for academic researchers. arXiv:2108.02497v3."},{"key":"e_1_3_2_2_20_1","volume-title":"The Machine Learning Reproducibility Checklist v2.0. accessed","year":"2022","unstructured":"2020. The Machine Learning Reproducibility Checklist v2.0. accessed August 26, 2022."},{"key":"e_1_3_2_2_21_1","volume-title":"Machine Learning","author":"Mitchell Tom","unstructured":"Tom Mitchell. 2019. Machine Learning. McGraw-Hill Education."},{"key":"e_1_3_2_2_22_1","volume-title":"Design and Analysis of Experiments","author":"Montgomery Douglas C.","unstructured":"Douglas C. Montgomery. 2019. Design and Analysis of Experiments. John Wiley & Sons Inc."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491411.2491415"},{"key":"e_1_3_2_2_24_1","volume-title":"The 36th Conference on Neural Information Processing Systems PaperChecklist Guidelines. accessed","year":"2022","unstructured":"2022. The 36th Conference on Neural Information Processing Systems PaperChecklist Guidelines. accessed August 26, 2022."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3324884.3416545"},{"key":"e_1_3_2_2_26_1","unstructured":"Paul Ralph Nauman bin Ali Sebastian Baltes Domenico Bianculli Jessica Diaz Yvonne Dittrich Neil Ernst Michael Felderer Robert Feldt Antonio Filieri Breno Bernard Nicolau de Fran\u00e7a Carlo Alberto Furia Greg Gay Nicolas Gold Daniel Graziotin Pinjia He Rashina Hoda Natalia Juristo Barbara Kitchenham Valentina Lenarduzzi Jorge Mart\u00ednez Jorge Melegati Daniel Mendez Tim Menzies Jefferson Molleri Dietmar Pfahl Romain Robbes Daniel Russo Nyyti Saarim\u00e4ki Federica Sarro Davide Taibi Janet Siegmund Diomidis Spinellis Miroslaw Staron Klaas Stol Margaret-Anne Storey Damian Tamburri Marco Torchiano Christoph Treude Burak Turhan Xiaofeng Wang and Sira Vegas. 2021. Empirical Standards for Software Engineering Research. arXiv:2010.03525v2."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180161"},{"key":"e_1_3_2_2_28_1","volume-title":"Some studies in machine learning using the game of checkers. IBM Journal of research and development, 3, 3","author":"Samuel Arthur","year":"1959","unstructured":"Arthur Samuel. 1959. Some studies in machine learning using the game of checkers. IBM Journal of research and development, 3, 3 (1959), 210\u2013229."},{"key":"e_1_3_2_2_29_1","volume-title":"Campbell","author":"Shadish William R.","year":"2002","unstructured":"William R. Shadish, Thomas D. Cook, and Donald T. Campbell. 2002. Experimental and Quasi-Experimental Designs for Generalized Causal Inference. Wadsworth, Cengage Learning."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33607-3_12"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3176725"},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning. 9913\u20139922","author":"Summers Cecilia","unstructured":"Cecilia Summers and Michael J. Dinneen. 2021. Nondeterminism and Instability in Neural Network Optimization. In Proceedings of the 38th International Conference on Machine Learning. 9913\u20139922."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-021-09973-5"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","unstructured":"Sira Vegas and Sebastian Elbaum. 2023. Badge Artifact for the paper Pitfalls in Experiments with DNN4SE: An Analysis of the State of the Practice. https:\/\/doi.org\/10.5281\/zenodo.10075778 10.5281\/zenodo.10075778","DOI":"10.5281\/zenodo.10075778"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Sira Vegas and Sebastian Elbaum. 2023. Repository for the Paper Pitfalls in Experiments with DNN4SE: An Analysis of the State of the Practice. https:\/\/github.com\/GRISE-UPM\/Pitfalls_Experiments_DNN4SE","DOI":"10.1145\/3611643.3616320"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-4625-2"},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of the 5th Conference on Machine Learning and Systems.","author":"Zhuang Donglin","year":"2022","unstructured":"Donglin Zhuang, Xingyao Zhang, Shuaiwen Leon Song, and Sara Hooker. 2022. Randomness In Neural Network Training: Characterizing The Impact of Tooling. In Proceedings of the 5th Conference on Machine Learning and Systems."}],"event":{"name":"ESEC\/FSE '23: 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","location":"San Francisco CA USA","acronym":"ESEC\/FSE '23","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616320","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3611643.3616320","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:11Z","timestamp":1750178171000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616320"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,30]]},"references-count":37,"alternative-id":["10.1145\/3611643.3616320","10.1145\/3611643"],"URL":"https:\/\/doi.org\/10.1145\/3611643.3616320","relation":{},"subject":[],"published":{"date-parts":[[2023,11,30]]},"assertion":[{"value":"2023-11-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}