{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T00:39:37Z","timestamp":1768005577463,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,10,21]],"date-time":"2026-10-21T00:00:00Z","timestamp":1792540800000},"content-version":"vor","delay-in-days":449,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"U.S. National Science Foundation","award":["2516579"],"award-info":[{"award-number":["2516579"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,29]]},"DOI":"10.1145\/3736731.3746159","type":"proceedings-article","created":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T11:58:19Z","timestamp":1761047899000},"page":"157-167","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Similarity-Based Assessment of Computational Reproducibility in Jupyter Notebooks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3229-8821","authenticated-orcid":false,"given":"A S M Shahadat","family":"Hossain","sequence":"first","affiliation":[{"name":"University of Missouri, Columbia, Missouri, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2214-2726","authenticated-orcid":false,"given":"Colin","family":"Brown","sequence":"additional","affiliation":[{"name":"Northern Illinois University, DeKalb, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4422-6162","authenticated-orcid":false,"given":"David","family":"Koop","sequence":"additional","affiliation":[{"name":"Northern Illinois University, DeKalb, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9656-727X","authenticated-orcid":false,"given":"Tanu","family":"Malik","sequence":"additional","affiliation":[{"name":"University of Missouri, Columbia, Missouri, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,21]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/eScience55777.2022.00015"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/eScience55777.2022.00079"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Marijan Beg Juliette Taka Thomas Kluyver Alexander Konovalov Min Ragan-Kelley Nicolas\u00a0M Thi\u00e9ry and Hans Fangohr. 2021. Using Jupyter for Reproducible Scientific Workflows. Comput. Sci. Eng. 23 2 (2021) 36\u201346.","DOI":"10.1109\/MCSE.2021.3052101"},{"key":"e_1_3_3_1_5_2","unstructured":"Anya Belz. 2021. Quantifying Reproducibility in NLP and ML. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.01211 (Sept. 2021). arxiv:https:\/\/arXiv.org\/abs\/2109.01211\u00a0[cs.CL]"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/VL\/HCC51201.2021.9576363"},{"key":"e_1_3_3_1_7_2","unstructured":"Morakot Choetkiertikul Apirak Hoonlor Chaiyong Ragkhitwetsagul Siripen Pongpaichet Thanwadee Sunetnanta Tasha Settewong Vacharavich Jiravatvanich and Urisayar Kaewpichai. 2023. Mining the Characteristics of Jupyter Notebooks in Data Science Projects. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.05325 (April 2023). arxiv:https:\/\/arXiv.org\/abs\/2304.05325\u00a0[cs.SE]"},{"key":"e_1_3_3_1_8_2","unstructured":"Fran\u00e7ois Chollet et\u00a0al. 2015. Keras. https:\/\/keras.io."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Jean-Daniel Fekete Juliana Freire and Theresa-Marie Rhyne. 2020. Exploring Reproducibility in Visualization. IEEE Comput. Graph. Appl. 40 5 (2020) 108\u2013119.","DOI":"10.1109\/MCG.2020.3006412"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Ludmilla Figueiredo C\u00e9dric Scherer and Juliano\u00a0Sarmento Cabral. 2022. A Simple Kit to Use Computational Notebooks for More Openness Reproducibility and Productivity in Research. PLoS Comput. Biol. 18 9 (Sept. 2022) e1010356.","DOI":"10.1371\/journal.pcbi.1010356"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Milo\u0161 Fi\u0161ar Ben Greiner Christoph Huber Elena Katok Ali\u00a0I Ozkes and Management Science Reproducibility Collaboration. 2024. Reproducibility in Management Science. Manage. Sci. 70 3 (March 2024) 1343\u20131356.","DOI":"10.1287\/mnsc.2023.03556"},{"key":"e_1_3_3_1_12_2","unstructured":"RA Fisher. 1936. Iris data set. UCI machine learning repository."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3528447"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"Charles\u00a0R. Harris K.\u00a0Jarrod Millman St\u00e9fan\u00a0J. van\u00a0der Walt Ralf Gommers Pauli Virtanen David Cournapeau Eric Wieser Julian Taylor Sebastian Berg Nathaniel\u00a0J. Smith Robert Kern Matti Picus Stephan Hoyer Marten\u00a0H. van Kerkwijk Matthew Brett Allan Haldane Jaime\u00a0Fern\u00e1ndez del R\u00edo Mark Wiebe Pearu Peterson Pierre G\u00e9rard-Marchant Kevin Sheppard Tyler Reddy Warren Weckesser Hameer Abbasi Christoph Gohlke and Travis\u00a0E. Oliphant. 2020. Array Programming with NumPy. Nature 585 7825 (Sept. 2020) 357\u2013362. 10.1038\/s41586-020-2649-2","DOI":"10.1038\/s41586-020-2649-2"},{"key":"e_1_3_3_1_15_2","unstructured":"IPyWidgets. 2024. IPyWidgets. https:\/\/github.com\/jupyter-widgets\/ipywidgets. Accessed: 2024-4-12."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3368308.3415397"},{"key":"e_1_3_3_1_17_2","unstructured":"Kaggle. 2024. Kaggle: Your Machine Learning and Data Science Community. https:\/\/www.kaggle.com\/. Accessed: 2024-4-12."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Donald\u00a0Ervin Knuth. 1984. Literate Programming. Comput. J. 27 2 (Jan. 1984) 97\u2013111.","DOI":"10.1093\/comjnl\/27.2.97"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Yann LeCun L\u00e9on Bottou Yoshua Bengio and Patrick Haffner. 1998. Gradient-based learning applied to document recognition. Proc. IEEE 86 11 (1998) 2278\u20132324.","DOI":"10.1109\/5.726791"},{"key":"e_1_3_3_1_20_2","unstructured":"Dirk Merkel and Others. 2014. Docker: Lightweight Linux Containers for Consistent Development and Deployment. Linux J. 239 2 (2014) 2."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"James\u00a0W Moody Lisa\u00a0A Keister and Maria\u00a0C Ramos. 2022. Reproducibility in the Social Sciences. Annu. Rev. Sociol. 48 1 (July 2022) 65\u201385.","DOI":"10.1146\/annurev-soc-090221-035954"},{"key":"e_1_3_3_1_22_2","volume-title":"Reproducibility and Replicability in Science","author":"Medicine National Academies of Sciences, Engineering, and","year":"2019","unstructured":"National Academies of Sciences, Engineering, and Medicine. 2019. Reproducibility and Replicability in Science. National Academies Press (US)."},{"key":"e_1_3_3_1_23_2","unstructured":"NumPy. 2024. numpy.set_printoptions. https:\/\/numpy.org\/doc\/stable\/reference\/generated\/numpy.set_printoptions.html. Accessed: 2024-03-26."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Priti Oli Rabin Banjade Lasang\u00a0Jimba Tamang and Vasile Rus. 2021. Automated Assessment of Quality of Jupyter Notebooks Using Artificial Intelligence and Big Code. The International FLAIRS Conference Proceedings 34 1 (April 2021).","DOI":"10.32473\/flairs.v34i1.128560"},{"key":"e_1_3_3_1_25_2","unstructured":"F. Pedregosa G. Varoquaux A. Gramfort V. Michel B. Thirion O. Grisel M. Blondel P. Prettenhofer R. Weiss V. Dubourg J. Vanderplas A. Passos D. Cournapeau M. Brucher M. Perrot and E. Duchesnay. 2011. Scikit-learn: Machine Learning in Python. Journal of Machine Learning Research 12 (2011) 2825\u20132830."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2019.00077"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Jo\u00e3o\u00a0Felipe Pimentel Leonardo Murta Vanessa Braganholo and Juliana Freire. 2021. Understanding and Improving the Quality and Reproducibility of Jupyter Notebooks. Empir. Softw. Eng. 26 4 (May 2021) 65.","DOI":"10.1007\/s10664-021-09961-9"},{"key":"e_1_3_3_1_28_2","first-page":"113","volume-title":"Proceedings of the 17th Python in Science Conference","author":"Ragan-Kelley Benjamin","year":"2018","unstructured":"Benjamin Ragan-Kelley, Carol Willing, F Akici, D Lippa, D Niederhut, and M Pacer. 2018. Binder 2.0-Reproducible, interactive, sharable environments for science at scale. In Proceedings of the 17th Python in Science Conference. F. Akici, D. Lippa, D. Niederhut, and M. Pacer, eds., 113\u2013120."},{"key":"e_1_3_3_1_29_2","unstructured":"Leonard Richardson. 2007. Beautiful Soup Documentation. (April 2007)."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Francisco Rowe Gunther Maier Daniel Arribas-Bel and Sergio\u00a0J Rey. 2020. The Potential of Notebooks for Scientific Publication: Reproducibility and Dissemination. Reg. Anaesth. 7 3 (2020).","DOI":"10.18335\/region.v7i3.357"},{"key":"e_1_3_3_1_31_2","unstructured":"Adam Rule Amanda Birmingham Cristal Zuniga Ilkay Altintas Shih-Cheng Huang Rob Knight Niema Moshiri Mai\u00a0H Nguyen Sara\u00a0Brin Rosenthal Fernando P\u00e9rez and Peter\u00a0W Rose. 2018. Ten Simple Rules for Reproducible Research in Jupyter Notebooks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.08055 (Oct. 2018). arxiv:https:\/\/arXiv.org\/abs\/1810.08055\u00a0[cs.OH]"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173606"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-80960-7_12"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Sheeba Samuel and Daniel Mietchen. 2024. Computational Reproducibility of Jupyter Notebooks from Biomedical Publications. GigaScience 13 (2024) giad113.","DOI":"10.1093\/gigascience\/giad113"},{"key":"e_1_3_3_1_35_2","unstructured":"Max Schr\u00f6der Frank Kr\u00fcger and Sascha Spors. 2019. Reproducible Research is More than Publishing Research Artefacts: A Systematic Analysis of Jupyter Notebooks from Research Articles. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1905.00092 (April 2019). arxiv:https:\/\/arXiv.org\/abs\/1905.00092\u00a0[cs.DL]"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0J Steventon Christopher A-L Jackson Matt Hall Mark\u00a0T Ireland Marcus Munafo and Kathryn\u00a0J Roberts. 2022. Reproducibility in Subsurface Geoscience. Earth Sci. Syst. Soc. 2 (Sept. 2022).","DOI":"10.3389\/esss.2022.10051"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3509134"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","unstructured":"St\u00e9fan van\u00a0der Walt Johannes\u00a0L. Sch\u00f6nberger Juan Nunez-Iglesias Fran\u00e7ois Boulogne Joshua\u00a0D. Warner Neil Yager Emmanuelle Gouillart Tony Yu and the scikit-image contributors. 2014. scikit-image: image processing in Python. PeerJ 2 (6 2014) e453. 10.7717\/peerj.453","DOI":"10.7717\/peerj.453"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451617"},{"key":"e_1_3_3_1_40_2","series-title":"(ASE \u201920)","first-page":"138","volume-title":"Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering","author":"Wang Jiawei","year":"2021","unstructured":"Jiawei Wang, Tzu-Yang Kuo, Li Li, and Andreas Zeller. 2021. Assessing and Restoring Reproducibility of Jupyter Notebooks. In Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering(ASE \u201920). 138\u2013149."},{"key":"e_1_3_3_1_41_2","first-page":"53","volume-title":"Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering: New Ideas and Emerging Results","author":"Wang Jiawei","year":"2020","unstructured":"Jiawei Wang, Li Li, and Andreas Zeller. 2020. Better Code, Better Sharing: On the Need of Analyzing Jupyter Notebooks. In Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering: New Ideas and Emerging Results. 53\u201356."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Zhou Wang and Alan\u00a0C Bovik. 2009. Mean squared error: Love it or leave it? A new look at signal fidelity measures. IEEE signal processing magazine 26 1 (2009) 98\u2013117.","DOI":"10.1109\/MSP.2008.930649"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Zhou Wang Alan\u00a0Conrad Bovik Hamid\u00a0Rahim Sheikh and Eero\u00a0P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13 4 (April 2004) 600\u2013612.","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","unstructured":"Michael\u00a0L. Waskom. 2021. seaborn: statistical data visualization. Journal of Open Source Software 6 60 (2021) 3021. 10.21105\/joss.03021","DOI":"10.21105\/joss.03021"},{"key":"e_1_3_3_1_45_2","unstructured":"William\u00a0E Winkler. 1990. String comparator metrics and enhanced decision rules in the fellegi-sunter model of record linkage. (1990)."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"crossref","unstructured":"Dandong Yin Yan Liu Hao Hu Jeff Terstriep Xingchen Hong Anand Padmanabhan and Shaowen Wang. 2019. CyberGIS\u2010Jupyter for reproducible and scalable geospatial analytics. Concurr. Comput. 31 11 (June 2019) e5040.","DOI":"10.1002\/cpe.5040"}],"event":{"name":"ACM REP '25: ACM Conference on Reproducibility and Replicability","location":"Vancouver Canada","acronym":"ACM REP '25","sponsor":["EIGREP Emerging Interest Group on Reproducibility and Replicability"]},"container-title":["Proceedings of the 3rd ACM Conference on Reproducibility and Replicability"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736731.3746159","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736731.3746159","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T18:04:39Z","timestamp":1767981879000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736731.3746159"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,29]]},"references-count":45,"alternative-id":["10.1145\/3736731.3746159","10.1145\/3736731"],"URL":"https:\/\/doi.org\/10.1145\/3736731.3746159","relation":{},"subject":[],"published":{"date-parts":[[2025,7,29]]},"assertion":[{"value":"2025-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}