{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T12:40:02Z","timestamp":1756816802868,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1145\/3736229.3736269","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T10:07:28Z","timestamp":1756807648000},"page":"57-61","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Fast Capture of Cell-Level Provenance in Numpy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8536-3510","authenticated-orcid":false,"given":"Jinjin","family":"Zhao","sequence":"first","affiliation":[{"name":"University of Chicago, Chicago, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6968-4090","authenticated-orcid":false,"given":"Sanjay","family":"Krishnan","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,9,2]]},"reference":[{"key":"e_1_3_3_1_2_2","volume-title":"Very Large Data Bases Conference","author":"Alexe Bogdan","year":"2006","unstructured":"Bogdan Alexe, Laura Chiticariu, and Wang\u00a0Chiew Tan. 2006. SPIDER: a schema mapPIng DEbuggeR. In Very Large Data Bases Conference. https:\/\/api.semanticscholar.org\/CorpusID:11896236"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Adriane\u00a0P. Chapman Paolo Missier Giuliano Simonelli and Riccardo Torlone. 2020. Capturing and querying fine-grained provenance of preprocessing pipelines in data science. Proc. VLDB Endow. 14 (2020) 507\u2013520. https:\/\/api.semanticscholar.org\/CorpusID:231842112","DOI":"10.14778\/3436905.3436911"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"James Cheney Laura Chiticariu and Wang\u00a0Chiew Tan. 2009. Provenance in Databases: Why How and Where. Found. Trends Databases 1 (2009) 379\u2013474. https:\/\/api.semanticscholar.org\/CorpusID:1778556","DOI":"10.1561\/1900000006"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/1066157.1066296"},{"key":"e_1_3_3_1_6_2","unstructured":"Yingwei Cui and Jennifer Widom. 2000. Practical lineage tracing in data warehouses. Proceedings of 16th International Conference on Data Engineering (Cat. No.00CB37073) (2000) 367\u2013378. https:\/\/api.semanticscholar.org\/CorpusID:15647832"},{"key":"e_1_3_3_1_7_2","volume-title":"Conference on Innovative Data Systems Research","author":"Grafberger Stefan","year":"2021","unstructured":"Stefan Grafberger, Julia Stoyanovich, and Sebastian Schelter. 2021. Lightweight Inspection of Data Preprocessing in Native Machine Learning Pipelines. In Conference on Innovative Data Systems Research. https:\/\/api.semanticscholar.org\/CorpusID:232203702"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/1265530.1265535"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Charles\u00a0R. Harris K.\u00a0Jarrod Millman St\u00e9fan\u00a0J. van\u00a0der Walt Ralf Gommers Pauli Virtanen David Cournapeau Eric Wieser Julian Taylor Sebastian Berg Nathaniel\u00a0J. Smith Robert Kern Matti Picus Stephan Hoyer Marten\u00a0H. van Kerkwijk Matthew Brett Allan Haldane Jaime\u00a0Fern\u00e1ndez del R\u00edo Mark Wiebe Pearu Peterson Pierre G\u00e9rard-Marchant Kevin Sheppard Tyler Reddy Warren Weckesser Hameer Abbasi Christoph Gohlke and Travis\u00a0E. Oliphant. 2020. Array programming with NumPy. Nature 585 7825 (Sept. 2020) 357\u2013362. https:\/\/doi.org\/10.1038\/s41586-020-2649-2","DOI":"10.1038\/s41586-020-2649-2"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/1376616.1376716"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Matteo Interlandi Kshitij Shah Sai\u00a0Deep Tetali Muhammad\u00a0Ali Gulzar Seunghyun Yoo Miryung Kim Todd\u00a0D. Millstein and Tyson Condie. 2015. Titian: Data Provenance Support in Spark. Proceedings of the VLDB Endowment International Conference on Very Large Data Bases 9 (2015) 216 \u2013 227. https:\/\/api.semanticscholar.org\/CorpusID:8433820","DOI":"10.14778\/2850583.2850595"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Zachary\u00a0G. Ives Todd\u00a0J. Green Grigoris Karvounarakis Nicholas\u00a0E. Taylor Val Tannen Partha\u00a0Pratim Talukdar Marie Jacob and Fernando Pereira. 2008. The ORCHESTRA Collaborative Data Sharing System. SIGMOD Rec. 37 3 (Sept. 2008) 26\u201332. https:\/\/doi.org\/10.1145\/1462571.1462577","DOI":"10.1145\/1462571.1462577"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Shachar Kaufman Saharon Rosset Claudia Perlich and Ori Stitelman. 2012. Leakage in data mining: Formulation detection and avoidance. 6 4 Article 15 (Dec. 2012) 21\u00a0pages. https:\/\/doi.org\/10.1145\/2382577.2382579","DOI":"10.1145\/2382577.2382579"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89965-5_4"},{"key":"e_1_3_3_1_15_2","unstructured":"Mohammad\u00a0Hossein Namaki Avrilia Floratou Fotis Psallidas Subru Krishnan Ashvin Agrawal and Yinghui Wu. 2020. Vamsa: Tracking Provenance in Data Science Scripts. ArXiv abs\/2001.01861 (2020). https:\/\/api.semanticscholar.org\/CorpusID:210023441"},{"key":"e_1_3_3_1_16_2","first-page":"8024","volume-title":"Advances in Neural Information Processing Systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32. Curran Associates, Inc., 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_3_1_17_2","unstructured":"Devin Petersohn William\u00a0W. Ma Doris Jung\u00a0Lin Lee Stephen Macke Doris Xin Xiangxi Mo Joseph\u00a0E. Gonzalez Joseph\u00a0M. Hellerstein Anthony\u00a0D. Joseph and Aditya\u00a0G. Parameswaran. 2020. Towards Scalable Dataframe Systems. CoRR abs\/2001.00888 (2020). arXiv:https:\/\/arXiv.org\/abs\/2001.00888http:\/\/arxiv.org\/abs\/2001.00888"},{"key":"e_1_3_3_1_18_2","unstructured":"D\u00e9bora\u00a0B. Pina Adriane\u00a0P. Chapman Daniel de Oliveira and Marta Mattoso. 2023. Deep Learning Provenance Data Integration: a Practical Approach. Companion Proceedings of the ACM Web Conference 2023 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258377574"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Fotis Psallidas and Eugene Wu. 2018. Smoke: Fine-grained Lineage at Interactive Speed. Proc. VLDB Endow. 11 (2018) 719\u2013732. https:\/\/api.semanticscholar.org\/CorpusID:3591285","DOI":"10.14778\/3184470.3184475"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Manasi Vartak Joana M.\u00a0F. da Trindade Samuel Madden and Matei\u00a0A. Zaharia. 2018. MISTIQUE: A System to Store and Query Model Intermediates for Model Diagnosis. Proceedings of the 2018 International Conference on Management of Data (2018). https:\/\/api.semanticscholar.org\/CorpusID:13815684","DOI":"10.1145\/3183713.3196934"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Eugene Wu Samuel Madden and Michael Stonebraker. 2013. SubZero: A fine-grained lineage system for scientific databases. 2013 IEEE 29th International Conference on Data Engineering (ICDE) (2013) 865\u2013876. https:\/\/api.semanticscholar.org\/CorpusID:11139063","DOI":"10.1109\/ICDE.2013.6544881"},{"key":"e_1_3_3_1_23_2","volume-title":"Workshop on the Theory and Practice of Provenance","author":"Yan Zhepeng","year":"2016","unstructured":"Zhepeng Yan, Val Tannen, and Zachary\u00a0G. Ives. 2016. Fine-grained Provenance for Linear Algebra Operators. In Workshop on the Theory and Practice of Provenance. https:\/\/api.semanticscholar.org\/CorpusID:14082705"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Zhao Zhang Evan\u00a0R. Sparks and Michael\u00a0J. Franklin. 2017. Diagnosing Machine Learning Pipelines with Fine-grained Lineage. Proceedings of the 26th International Symposium on High-Performance Parallel and Distributed Computing (2017). https:\/\/api.semanticscholar.org\/CorpusID:66162","DOI":"10.1145\/3078597.3078603"},{"key":"e_1_3_3_1_25_2","unstructured":"Jinjin Zhao Avidgor Gal and Sanjay Krishnan. 2024. A System for Quantifying Data Science Workflows with Fine-Grained Procedural Logging and a Pilot Study. arxiv:https:\/\/arXiv.org\/abs\/2405.17845\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2405.17845"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE60146.2024.00281"},{"key":"e_1_3_3_1_27_2","unstructured":"Jinjin Zhao and Sanjay Krishnan. 2024. Compression and In-Situ Query Processing for Fine-Grained Array Lineage. arxiv:https:\/\/arXiv.org\/abs\/2405.17701\u00a0[cs.DB] https:\/\/arxiv.org\/abs\/2405.17701"}],"event":{"name":"PW' 25: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Berlin Germany","acronym":"PW' 25"},"container-title":["Proceedings of the ProvenanceWeek 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736229.3736269","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T11:58:49Z","timestamp":1756814329000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736229.3736269"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":26,"alternative-id":["10.1145\/3736229.3736269","10.1145\/3736229"],"URL":"https:\/\/doi.org\/10.1145\/3736229.3736269","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]},"assertion":[{"value":"2025-09-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}