{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:02:30Z","timestamp":1758121350567,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1145\/3555041.3589717","type":"proceedings-article","created":{"date-parts":[[2023,6,5]],"date-time":"2023-06-05T16:25:14Z","timestamp":1685982314000},"page":"123-126","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Demonstration of Geyser: Provenance Extraction and Applications over Data Science Scripts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4110-5813","authenticated-orcid":false,"given":"Fotis","family":"Psallidas","sequence":"first","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8065-7763","authenticated-orcid":false,"given":"Megan Eileen","family":"Leszczynski","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5589-7818","authenticated-orcid":false,"given":"Mohammad Hossein","family":"Namaki","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5760-8657","authenticated-orcid":false,"given":"Avrilia","family":"Floratou","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7862-0995","authenticated-orcid":false,"given":"Ashvin","family":"Agrawal","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6975-2568","authenticated-orcid":false,"given":"Konstantinos","family":"Karanasos","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8534-0889","authenticated-orcid":false,"given":"Subru","family":"Krishnan","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6536-3932","authenticated-orcid":false,"given":"Pavle","family":"Subotic","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2620-663X","authenticated-orcid":false,"given":"Markus","family":"Weimer","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3991-5155","authenticated-orcid":false,"given":"Yinghui","family":"Wu","sequence":"additional","affiliation":[{"name":"Case Western Reserve University, Cleveland, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6857-7505","authenticated-orcid":false,"given":"Yiwen","family":"Zhu","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,5]]},"reference":[{"key":"e_1_3_2_3_1_1","unstructured":"2022. Apache Atlas. https:\/\/atlas.apache.org\/"},{"key":"e_1_3_2_3_2_1","doi-asserted-by":"crossref","unstructured":"Nathalie Baracaldo Bryant Chen Heiko Ludwig Amir Safavi and Rui Zhang. 2018. Detecting Poisoning Attacks on Machine Learning in IoT Environments. In ICIOT. 57--64.","DOI":"10.1109\/ICIOT.2018.00015"},{"key":"e_1_3_2_3_3_1","doi-asserted-by":"publisher","DOI":"10.14778\/3554821.3554857"},{"key":"e_1_3_2_3_4_1","volume-title":"Runtime Provenance Refinement for Notebooks. In TaPP '22","author":"Deo Nachiket","year":"2022","unstructured":"Nachiket Deo, Boris Glavic, and Oliver Kennedy. 2022. Runtime Provenance Refinement for Notebooks. In TaPP '22."},{"key":"e_1_3_2_3_5_1","volume-title":"Perm: Processing provenance and data on the same data model through query rewriting. In ICDE.","author":"Glavic Boris","year":"2009","unstructured":"Boris Glavic and Gustavo Alonso. 2009. Perm: Processing provenance and data on the same data model through query rewriting. In ICDE."},{"volume-title":"Retrieved","year":"2022","key":"e_1_3_2_3_6_1","unstructured":"Kaggle. 2022. Kaggle Heart Disease Competition. Retrieved December 14, 2022 from https:\/\/www.kaggle.com\/datasets\/rishidamarla\/heart-disease-prediction"},{"key":"e_1_3_2_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807269"},{"key":"e_1_3_2_3_8_1","doi-asserted-by":"publisher","DOI":"10.14778\/3447689.3447712"},{"key":"e_1_3_2_3_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2452376.2452478"},{"key":"e_1_3_2_3_10_1","volume-title":"Vamsa: Automated Provenance Tracking in Data Science Scripts. In SIGKDD '20","author":"Namaki Mohammad Hossein","year":"2020","unstructured":"Mohammad Hossein Namaki, Avrilia Floratou, Fotis Psallidas, Subru Krishnan, Ashvin Agrawal, Yinghui Wu, Yiwen Zhu, and MarkusWeimer. 2020. Vamsa: Automated Provenance Tracking in Data Science Scripts. In SIGKDD '20. 1542--1551."},{"key":"e_1_3_2_3_11_1","volume-title":"Beat Buesser, Ambrish Rawat, Martin Wistuba, Valentina Zantedeschi, Nathalie Baracaldo, Bryant Chen, Heiko Ludwig, Ian Molloy, and Ben Edwards.","author":"Nicolae Maria-Irina","year":"2018","unstructured":"Maria-Irina Nicolae, Mathieu Sinn, Minh Ngoc Tran, Beat Buesser, Ambrish Rawat, Martin Wistuba, Valentina Zantedeschi, Nathalie Baracaldo, Bryant Chen, Heiko Ludwig, Ian Molloy, and Ben Edwards. 2018. Adversarial Robustness Toolbox v1.2.0. CoRR 1807.01069 (2018). https:\/\/arxiv.org\/pdf\/1807.01069"},{"key":"e_1_3_2_3_12_1","first-page":"6","article-title":"Smoke: Fine-Grained Lineage at Interactive Speed","volume":"11","author":"Psallidas Fotis","year":"2019","unstructured":"Fotis Psallidas and EugeneWu. 2019. Smoke: Fine-Grained Lineage at Interactive Speed. PVLDB 11, 6 (jan 2019), 719--732.","journal-title":"PVLDB"},{"key":"e_1_3_2_3_13_1","first-page":"2","article-title":"Data Science Through the Looking Glass: Analysis of Millions of GitHub Notebooks and ML.","volume":"51","author":"Psallidas Fotis","year":"2022","unstructured":"Fotis Psallidas, Yiwen Zhu, Bojan Karlas, Jordan Henkel, Matteo Interlandi, Subru Krishnan, Brian Kroth, Venkatesh Emani, Wentao Wu, Ce Zhang, Markus Weimer, Avrilia Floratou, Carlo Curino, and Konstantinos Karanasos. 2022. Data Science Through the Looking Glass: Analysis of Millions of GitHub Notebooks and ML.NET Pipelines. SIGMOD Record 51, 2 (jul 2022), 30--37.","journal-title":"NET Pipelines. SIGMOD Record"}],"event":{"name":"SIGMOD\/PODS '23: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Seattle WA USA","acronym":"SIGMOD\/PODS '23"},"container-title":["Companion of the 2023 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3555041.3589717","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3555041.3589717","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T18:43:58Z","timestamp":1750272238000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3555041.3589717"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":13,"alternative-id":["10.1145\/3555041.3589717","10.1145\/3555041"],"URL":"https:\/\/doi.org\/10.1145\/3555041.3589717","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]},"assertion":[{"value":"2023-06-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}