{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T09:51:01Z","timestamp":1773481861058,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,8,20]],"date-time":"2020-08-20T00:00:00Z","timestamp":1597881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100004318","name":"Microsoft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004318","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,8,23]]},"DOI":"10.1145\/3394486.3403205","type":"proceedings-article","created":{"date-parts":[[2020,8,20]],"date-time":"2020-08-20T23:03:55Z","timestamp":1597964635000},"page":"1542-1551","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":41,"title":["Vamsa: Automated Provenance Tracking in Data Science Scripts"],"prefix":"10.1145","author":[{"given":"Mohammad Hossein","family":"Namaki","sequence":"first","affiliation":[{"name":"Washington State University, Pullman, WA, USA"}]},{"given":"Avrilia","family":"Floratou","sequence":"additional","affiliation":[{"name":"Microsoft, Sunnyvale, CA, USA"}]},{"given":"Fotis","family":"Psallidas","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, CA, USA"}]},{"given":"Subru","family":"Krishnan","sequence":"additional","affiliation":[{"name":"Microsoft, Sunnyvale, CA, USA"}]},{"given":"Ashvin","family":"Agrawal","sequence":"additional","affiliation":[{"name":"Microsoft, Sunnyvale, CA, USA"}]},{"given":"Yinghui","family":"Wu","sequence":"additional","affiliation":[{"name":"Case Western Reserve University, Cleveland, OH, USA"}]},{"given":"Yiwen","family":"Zhu","sequence":"additional","affiliation":[{"name":"Microsoft, Sunnyvale, CA, USA"}]},{"given":"Markus","family":"Weimer","sequence":"additional","affiliation":[{"name":"Microsoft, Sunnyvale, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,8,20]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/xgboost.readthedocs.io\/en\/latest\/index.html","year":"2014","unstructured":"Xgboost. https:\/\/xgboost.readthedocs.io\/en\/latest\/index.html , 2014 . Xgboost. https:\/\/xgboost.readthedocs.io\/en\/latest\/index.html, 2014."},{"key":"e_1_3_2_1_2_1","volume-title":"https:\/\/ec.europa.eu\/commission\/priorities\/justice-and-fundamental-rights\/data-protection\/2018-reform-eu-data-protection-rules\/eu-data-protection-rules_en","author":"Regulations EU","year":"2018","unstructured":"EU GDPR Regulations . https:\/\/ec.europa.eu\/commission\/priorities\/justice-and-fundamental-rights\/data-protection\/2018-reform-eu-data-protection-rules\/eu-data-protection-rules_en , 2018 . EU GDPR Regulations. https:\/\/ec.europa.eu\/commission\/priorities\/justice-and-fundamental-rights\/data-protection\/2018-reform-eu-data-protection-rules\/eu-data-protection-rules_en, 2018."},{"key":"e_1_3_2_1_3_1","volume-title":"https:\/\/www.kaggle.com\/ronitf\/heart-disease-uci","year":"2018","unstructured":"Kaggle Heart Disease. https:\/\/www.kaggle.com\/ronitf\/heart-disease-uci , 2018 . Kaggle Heart Disease. https:\/\/www.kaggle.com\/ronitf\/heart-disease-uci, 2018."},{"key":"e_1_3_2_1_4_1","volume-title":"https:\/\/www.kaggle.com\/kaggle\/kaggle-survey-2018","year":"2018","unstructured":"Kaggle survey. https:\/\/www.kaggle.com\/kaggle\/kaggle-survey-2018 , 2018 . Kaggle survey. https:\/\/www.kaggle.com\/kaggle\/kaggle-survey-2018, 2018."},{"key":"e_1_3_2_1_5_1","volume-title":"https:\/\/github.com\/Kaggle\/kaggle-api","author":"API.","year":"2018","unstructured":"Official Kaggle API. https:\/\/github.com\/Kaggle\/kaggle-api , 2018 . Official Kaggle API. https:\/\/github.com\/Kaggle\/kaggle-api, 2018."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/docs.python.org\/3\/library\/ast.html","year":"2019","unstructured":"Abstract syntax trees. https:\/\/docs.python.org\/3\/library\/ast.html , 2019 . Abstract syntax trees. https:\/\/docs.python.org\/3\/library\/ast.html, 2019."},{"key":"e_1_3_2_1_7_1","volume-title":"https:\/\/sites.google.com\/view\/kdd19-explainable-ai-tutorial","author":"Industry AI","year":"2019","unstructured":"Explainable AI in Industry . https:\/\/sites.google.com\/view\/kdd19-explainable-ai-tutorial , 2019 . Explainable AI in Industry. https:\/\/sites.google.com\/view\/kdd19-explainable-ai-tutorial, 2019."},{"key":"e_1_3_2_1_8_1","volume-title":"https:\/\/xai.kdd2019.a.intuit.com\/","author":"Accountability ML","year":"2019","unstructured":"Explainable AI\/ ML (XAI) for Accountability , Fairness, and Transparency. https:\/\/xai.kdd2019.a.intuit.com\/ , 2019 . Explainable AI\/ML (XAI) for Accountability, Fairness, and Transparency. https:\/\/xai.kdd2019.a.intuit.com\/, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Practical Challenges and Lessons learned. https:\/\/sites.google.com\/view\/kdd19-fairness-tutorial","year":"2019","unstructured":"Fairness-Aware Machine Learning : Practical Challenges and Lessons learned. https:\/\/sites.google.com\/view\/kdd19-fairness-tutorial , 2019 . Fairness-Aware Machine Learning: Practical Challenges and Lessons learned. https:\/\/sites.google.com\/view\/kdd19-fairness-tutorial, 2019."},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/www.kubeflow.org\/","year":"2019","unstructured":"Kubeflow. https:\/\/www.kubeflow.org\/ , 2019 . Kubeflow. https:\/\/www.kubeflow.org\/, 2019."},{"key":"e_1_3_2_1_11_1","volume-title":"https:\/\/github.com\/mlflow\/mlflow\/","year":"2019","unstructured":"Mlflow. https:\/\/github.com\/mlflow\/mlflow\/ , 2019 . Mlflow. https:\/\/github.com\/mlflow\/mlflow\/, 2019."},{"key":"e_1_3_2_1_12_1","volume-title":"https:\/\/greentreesnakes.readthedocs.io\/en\/latest\/","author":"AST","year":"2019","unstructured":"Python AST docs. https:\/\/greentreesnakes.readthedocs.io\/en\/latest\/ , 2019 . Python AST docs. https:\/\/greentreesnakes.readthedocs.io\/en\/latest\/, 2019."},{"key":"e_1_3_2_1_13_1","volume-title":"https:\/\/towardsdatascience.com\/programming-languages-for-data-scientists-afde2eaf5cc5","year":"2019","unstructured":"Python language. https:\/\/towardsdatascience.com\/programming-languages-for-data-scientists-afde2eaf5cc5 , 2019 . Python language. https:\/\/towardsdatascience.com\/programming-languages-for-data-scientists-afde2eaf5cc5, 2019."},{"key":"e_1_3_2_1_14_1","volume-title":"https:\/\/pytorch.org\/","year":"2019","unstructured":"PyTorch. https:\/\/pytorch.org\/ , 2019 . PyTorch. https:\/\/pytorch.org\/, 2019."},{"key":"e_1_3_2_1_15_1","volume-title":"https:\/\/github.com\/python\/typeshed","year":"2019","unstructured":"Typeshed. https:\/\/github.com\/python\/typeshed , 2019 . Typeshed. https:\/\/github.com\/python\/typeshed, 2019."},{"key":"e_1_3_2_1_16_1","volume-title":"aka.ms\/vamsa","year":"2020","unstructured":"Vamsa. aka.ms\/vamsa , 2020 . Vamsa. aka.ms\/vamsa, 2020."},{"key":"e_1_3_2_1_17_1","volume-title":"TaPP","author":"Angelino E.","year":"2011","unstructured":"E. Angelino Provenance integration requires reconciliation . In TaPP , 2011 . E. Angelino et al. Provenance integration requires reconciliation. In TaPP, 2011."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-17819-1_27"},{"key":"e_1_3_2_1_19_1","first-page":"379","volume-title":"TRDB","author":"Cheney J.","year":"2009","unstructured":"J. Cheney Provenance in databases: Why, how, and where . TRDB , pages 379 -- 474 , 2009 . J. Cheney et al. Provenance in databases: Why, how, and where. TRDB, pages 379--474, 2009."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1066157.1066296"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/357775.357777"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.14778\/3055540.3055550"},{"key":"e_1_3_2_1_23_1","volume-title":"Provenance in scientific workflow systems","author":"Freire J.","year":"2007","unstructured":"J. Freire and M. Anand . Provenance in scientific workflow systems . IEEE Data Engineering Bulletin , 2007 . J. Freire and M. Anand. Provenance in scientific workflow systems. IEEE Data Engineering Bulletin, 2007."},{"key":"e_1_3_2_1_24_1","volume-title":"KDD CMI Workshop","author":"Garcia R.","year":"2018","unstructured":"R. Garcia : The missing piece in the machine learning lifecycle . In KDD CMI Workshop , 2018 . R. Garcia et al. Context: The missing piece in the machine learning lifecycle. In KDD CMI Workshop, 2018."},{"key":"e_1_3_2_1_25_1","volume-title":"Datasheets for datasets","author":"Gebru T.","year":"2018","unstructured":"T. Gebru Datasheets for datasets , 2018 . T. Gebru et al. Datasheets for datasets, 2018."},{"key":"e_1_3_2_1_26_1","volume-title":"Stanford InfoLab","author":"Ikeda R.","year":"2009","unstructured":"R. Ikeda and J. Widom . Data lineage: A survey. Technical report , Stanford InfoLab , 2009 . R. Ikeda and J. Widom. Data lineage: A survey. Technical report, Stanford InfoLab, 2009."},{"key":"e_1_3_2_1_27_1","volume-title":"CIDR","author":"Ives Z.","year":"2019","unstructured":"Z. Ives Dataset relationship management . In CIDR , 2019 . Z. Ives et al. Dataset relationship management. In CIDR, 2019."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219905"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.2218\/ijdc.v10i1.370"},{"key":"e_1_3_2_1_30_1","first-page":"1235","volume-title":"JMLR","author":"Meng X.","year":"2016","unstructured":"X. Meng : Machine learning in apache spark . JMLR , pages 1235 -- 1241 , 2016 . X. Meng et al. Mllib: Machine learning in apache spark. JMLR, pages 1235--1241, 2016."},{"key":"e_1_3_2_1_31_1","first-page":"26","volume-title":"IEEE Data Eng. Bull.","author":"Miao H.","year":"2018","unstructured":"H. Miao and A. Deshpande . Provdb: Provenance-enabled lifecycle management of collaborative data analysis workflows . IEEE Data Eng. Bull. , pages 26 -- 38 , 2018 . H. Miao and A. Deshpande. Provdb: Provenance-enabled lifecycle management of collaborative data analysis workflows. IEEE Data Eng. Bull., pages 26--38, 2018."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2017.192"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2017.112"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3319890"},{"key":"e_1_3_2_1_35_1","volume-title":"Vamsa: Tracking provenance in data science scripts (technical report). arXiv preprint arXiv:2001.01861","author":"Namaki M. H.","year":"2020","unstructured":"M. H. Namaki Vamsa: Tracking provenance in data science scripts (technical report). arXiv preprint arXiv:2001.01861 , 2020 . M. H. Namaki et al. Vamsa: Tracking provenance in data science scripts (technical report). arXiv preprint arXiv:2001.01861, 2020."},{"key":"e_1_3_2_1_36_1","first-page":"2825","volume-title":"JMLR","author":"Pedregosa F.","year":"2011","unstructured":"F. Pedregosa : Machine learning in python . JMLR , pages 2825 -- 2830 , 2011 . F. Pedregosa et al. Scikit-learn: Machine learning in python. JMLR, pages 2825--2830, 2011."},{"key":"e_1_3_2_1_37_1","volume-title":"VLDB","author":"Pimentel J. F.","year":"2017","unstructured":"J. F. Pimentel : a tool for collecting, analyzing, and managing provenance from python scripts . VLDB , 2017 . J. F. Pimentel et al. noworkflow: a tool for collecting, analyzing, and managing provenance from python scripts. VLDB, 2017."},{"key":"e_1_3_2_1_38_1","volume-title":"NIPS","author":"Prokhorenkova L.","year":"2018","unstructured":"L. Prokhorenkova : unbiased boosting with categorical features . In NIPS , 2018 . L. Prokhorenkova et al. Catboost: unbiased boosting with categorical features. In NIPS, 2018."},{"key":"e_1_3_2_1_39_1","volume-title":"Data science through the looking glass and what we found there. arXiv preprint arXiv:1912.09536","author":"Psallidas F.","year":"2019","unstructured":"F. Psallidas Data science through the looking glass and what we found there. arXiv preprint arXiv:1912.09536 , 2019 . F. Psallidas et al. Data science through the looking glass and what we found there. arXiv preprint arXiv:1912.09536, 2019."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209900.3209904"},{"key":"e_1_3_2_1_41_1","first-page":"719","volume-title":"VLDB","author":"Psallidas F.","year":"2018","unstructured":"F. Psallidas and E. Wu . Smoke: Fine-grained lineage at interactive speed . VLDB , pages 719 -- 732 , 2018 . F. Psallidas and E. Wu. Smoke: Fine-grained lineage at interactive speed. VLDB, pages 719--732, 2018."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2015.2467551"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173606"},{"key":"e_1_3_2_1_44_1","volume-title":"Machine Learning Systems workshop at NIPS","author":"Schelter S.","year":"2017","unstructured":"S. Schelter Automatically tracking metadata and provenance of machine learning experiments . In Machine Learning Systems workshop at NIPS , 2017 . S. Schelter et al. Automatically tracking metadata and provenance of machine learning experiments. In Machine Learning Systems workshop at NIPS, 2017."},{"key":"e_1_3_2_1_45_1","first-page":"5","volume-title":"IEEE Data Eng. Bull.","author":"Schelter S.","year":"2018","unstructured":"S. Schelter On challenges in machine learning model management . IEEE Data Eng. Bull. , pages 5 -- 15 , 2018 . S. Schelter et al. On challenges in machine learning model management. IEEE Data Eng. Bull., pages 5--15, 2018."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357223.3362716"},{"key":"e_1_3_2_1_47_1","first-page":"395","volume-title":"ACM SIGKDD","author":"Shu K.","year":"2019","unstructured":"K. Shu : Explainable Fake News Detection . In ACM SIGKDD , pages 395 -- 405 . ACM, 2019 . K. Shu et al. dEFEND: Explainable Fake News Detection. In ACM SIGKDD, pages 395--405. ACM, 2019."},{"key":"e_1_3_2_1_48_1","volume-title":"Engineering A Compiler","author":"Torczon L.","year":"2011","unstructured":"L. Torczon and K. Cooper . Engineering A Compiler . Morgan Kaufmann Publishers Inc ., San Francisco, CA, USA, 2 nd edition, 2011 . L. Torczon and K. Cooper. Engineering A Compiler. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 2nd edition, 2011.","edition":"2"},{"key":"e_1_3_2_1_49_1","volume-title":"HILDA","author":"Vartak M.","year":"2016","unstructured":"M. Vartak : a system for machine learning model management . In HILDA , 2016 . M. Vartak et al. Modeldb: a system for machine learning model management. In HILDA, 2016."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196934"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536354.2536356"}],"event":{"name":"KDD '20: The 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Virtual Event CA USA","acronym":"KDD '20","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394486.3403205","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394486.3403205","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:46Z","timestamp":1750197706000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394486.3403205"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,20]]},"references-count":51,"alternative-id":["10.1145\/3394486.3403205","10.1145\/3394486"],"URL":"https:\/\/doi.org\/10.1145\/3394486.3403205","relation":{},"subject":[],"published":{"date-parts":[[2020,8,20]]},"assertion":[{"value":"2020-08-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}