{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T22:53:09Z","timestamp":1757631189946,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":99,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1145\/3722212.3725636","type":"proceedings-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T09:00:26Z","timestamp":1750150826000},"page":"813-820","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Navigating Data Errors in Machine Learning Pipelines: Identify, Debug, and Learn"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6462-3579","authenticated-orcid":false,"given":"Bojan","family":"Karla\u0161","sequence":"first","affiliation":[{"name":"Harvard University, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8763-8354","authenticated-orcid":false,"given":"Babak","family":"Salimi","sequence":"additional","affiliation":[{"name":"University of California, San Diego, San Diego, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4722-5840","authenticated-orcid":false,"given":"Sebastian","family":"Schelter","sequence":"additional","affiliation":[{"name":"BIFOLD, Berlin, Germany and TU Berlin, Berlin, Germany"}]}],"member":"320","published-online":{"date-parts":[[2025,6,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/gdpr-info.eu\/. (2016). [Online","author":"2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council of 27 April 2016 on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing Directive 95\/46\/EC (General Data Protection Regulation).","year":"2019","unstructured":"2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council of 27 April 2016 on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing Directive 95\/46\/EC (General Data Protection Regulation). https:\/\/gdpr-info.eu\/. (2016). [Online; accessed 17-Feb-2019]."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824076"},{"key":"e_1_3_2_1_3_1","unstructured":"Amazon Web Services. 2020. SageMaker Pipelines. https:\/\/aws.amazon.com\/sagemaker\/pipelines\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC63097.2024.00013"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-021-00671-8"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098021"},{"key":"e_1_3_2_1_7_1","volume-title":"Bertrand Melenberg, and Gijs Rennen.","author":"Ben-Tal Aharon","year":"2011","unstructured":"Aharon Ben-Tal, Dick den Hertog, Anja De Waegenaere, Bertrand Melenberg, and Gijs Rennen. 2011. Robust Solutions of Optimization Problems Affected by Uncertain Probabilities. Advanced Risk & Portfolio Management\u00ae Research Paper Series (2011). https:\/\/api.semanticscholar.org\/CorpusID:761793"},{"key":"e_1_3_2_1_8_1","volume-title":"Verified Code Transpilation with LLMs. NeurIPS","author":"Bhatia Sahil","year":"2024","unstructured":"Sahil Bhatia, Jie Qiu, Niranjan Hasabnis, Sanjit A Seshia, and Alvin Cheung. 2024. Verified Code Transpilation with LLMs. NeurIPS (2024)."},{"key":"e_1_3_2_1_9_1","unstructured":"Matthias Boehm et al . 2020. SystemDS: A Declarative Machine Learning System for the End-to-End Data Science Lifecycle. CIDR (2020)."},{"key":"e_1_3_2_1_10_1","volume-title":"The effects of data quality on machine learning performance. arXiv preprint arXiv:2207.14529","author":"Budach Lukas","year":"2022","unstructured":"Lukas Budach, Moritz Feuerpfeil, Nina Ihde, Andrea Nathansen, Nele Noack, Hendrik Patzlaff, Felix Naumann, and Hazar Harmouch. 2022. The effects of data quality on machine learning performance. arXiv preprint arXiv:2207.14529 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-010-0190-x"},{"key":"e_1_3_2_1_12_1","unstructured":"California Consumer Privacy Act (CCPA). 2024. Requests to Delete. https:\/\/oag.ca.gov\/privacy\/ccpa#sectiond."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2912574"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87987-9_8"},{"key":"e_1_3_2_1_15_1","volume-title":"Stochastic Amortization: A Unified Approach to Accelerate Feature and Data Attribution. NeurIPS","author":"Covert Ian","year":"2024","unstructured":"Ian Covert, Chanwoo Kim, Su-In Lee, James Zou, and Tatsunori Hashimoto. 2024. Stochastic Amortization: A Unified Approach to Accelerate Feature and Data Attribution. NeurIPS (2024)."},{"key":"e_1_3_2_1_16_1","unstructured":"Databricks. 2022. Mlflow Recipes. https:\/\/mlflow.org\/docs\/latest\/recipes.html."},{"key":"e_1_3_2_1_17_1","volume-title":"Robust fairness-aware learning under sample selection bias. arXiv preprint arXiv:2105.11570","author":"Du Wei","year":"2021","unstructured":"Wei Du and Xintao Wu. 2021. Robust fairness-aware learning under sample selection bias. arXiv preprint arXiv:2105.11570 (2021)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Sebastian Schelter et al. 2021. HedgeCut: Maintaining Randomised Trees for Low-Latency Machine Unlearning. In SIGMOD.","DOI":"10.1145\/3448016.3457239"},{"key":"e_1_3_2_1_19_1","unstructured":"EU AI Act. 2024. Article 10: Data and Data Governance. https:\/\/artificialintelligenceact.eu\/article\/10\/."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Raul Castro Fernandez et al . 2023. How large language models will disrupt data management. VLDB (2023).","DOI":"10.14778\/3611479.3611527"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517849"},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. PMLR, 2242--2251","author":"Ghorbani Amirata","year":"2019","unstructured":"Amirata Ghorbani and James Zou. 2019. Data shapley: Equitable valuation of data for machine learning. In International conference on machine learning. PMLR, 2242--2251."},{"key":"e_1_3_2_1_23_1","unstructured":"Google. 2021. Vertex AI Pipelines. https:\/\/cloud.google.com\/vertex-ai\/docs\/pipelines."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589273"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-021-00726-w"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3452759"},{"key":"e_1_3_2_1_27_1","first-page":"63","article-title":"Red Onions, Soft Cheese and Data: From Food Safety to Data Traceability for Responsible AI","volume":"47","author":"Grafberger Stefan","year":"2024","unstructured":"Stefan Grafberger, Zeyu Zhang, Sebastian Schelter, and Ce Zhang. 2024. Red Onions, Soft Cheese and Data: From Food Safety to Data Traceability for Responsible AI. IEEE Data Eng. Bull. 47, 1 (2024), 63--81.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1265530.1265535"},{"key":"e_1_3_2_1_29_1","volume-title":"Julia Stoyanovich, and Sebastian Schelter.","author":"Guha Shubha","year":"2022","unstructured":"Shubha Guha, Falaah Arif Khan, Julia Stoyanovich, and Sebastian Schelter. 2022. Automated Data Cleaning Can Hurt Fairness in Machine Learning-based Decision Making. ICDE (2022)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-023-06495-7"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300830"},{"key":"e_1_3_2_1_32_1","volume-title":"Correcting sample selection bias by unlabeled data. Advances in neural information processing systems 19","author":"Huang Jiayuan","year":"2006","unstructured":"Jiayuan Huang, Arthur Gretton, Karsten Borgwardt, Bernhard Sch\u00f6lkopf, and Alex Smola. 2006. Correcting sample selection bias by unlabeled data. Advances in neural information processing systems 19 (2006)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16971"},{"key":"e_1_3_2_1_34_1","volume-title":"Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas J Spanos, and Dawn Song.","author":"Jia Ruoxi","year":"2019","unstructured":"Ruoxi Jia, David Dao, Boxin Wang, Frances Ann Hubis, Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas J Spanos, and Dawn Song. 2019. Efficient task-specific data valuation for nearest neighbor algorithms. arXiv preprint arXiv:1908.08619 (2019)."},{"key":"e_1_3_2_1_35_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167--1176","author":"Jia Ruoxi","year":"2019","unstructured":"Ruoxi Jia, David Dao, Boxin Wang, Frances Ann Hubis, Nick Hynes, Nezihe Merve G\u00fcrel, Bo Li, Ce Zhang, Dawn Song, and Costas J Spanos. 2019. Towards efficient data valuation based on the shapley value. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167--1176."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00814"},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 702--712","author":"Jiang Heinrich","year":"2020","unstructured":"Heinrich Jiang and Ofir Nachum. 2020. Identifying and correcting label bias in machine learning. In International Conference on Artificial Intelligence and Statistics. PMLR, 702--712."},{"key":"e_1_3_2_1_38_1","volume-title":"Opendataval: a unified benchmark for data valuation. Advances in Neural Information Processing Systems 36","author":"Jiang Kevin","year":"2023","unstructured":"Kevin Jiang, Weixin Liang, James Y Zou, and Yongchan Kwon. 2023. Opendataval: a unified benchmark for data valuation. Advances in Neural Information Processing Systems 36 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Data preprocessing techniques for classification without discrimination. Knowledge and information systems 33, 1","author":"Kamiran Faisal","year":"2012","unstructured":"Faisal Kamiran and Toon Calders. 2012. Data preprocessing techniques for classification without discrimination. Knowledge and information systems 33, 1 (2012), 1--33."},{"key":"e_1_3_2_1_40_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Karla\u0161 Bojan","year":"2023","unstructured":"Bojan Karla\u0161, David Dao, Matteo Interlandi, Sebastian Schelter, Wentao Wu, and Ce Zhang. 2023. Data Debugging with Shapley Importance over Machine Learning Pipelines. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/3430915.3442426"},{"key":"e_1_3_2_1_42_1","volume-title":"Xu Chu, Wentao Wu, and Ce Zhang.","author":"Karla\u0161 Bojan","year":"2020","unstructured":"Bojan Karla\u0161, Peng Li, Renzhi Wu, Nezihe Merve G\u00fcrel, Xu Chu, Wentao Wu, and Ce Zhang. 2020. Nearest neighbor classifiers over incomplete information: From certain answers to certain predictions. VLDB (2020)."},{"key":"e_1_3_2_1_43_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Koh Pang Wei","year":"2017","unstructured":"Pang Wei Koh and Percy Liang. 2017. Understanding black-box predictions via influence functions. In International Conference on Machine Learning. PMLR, 1885--1894."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.14778\/2994509.2994514"},{"key":"e_1_3_2_1_45_1","first-page":"8","article-title":"Data Anamnesis: Admitting Raw Data into an Organization","volume":"39","author":"Kruse Sebastian","year":"2016","unstructured":"Sebastian Kruse, Thorsten Papenbrock, Hazar Harmouch, and Felix Naumann. 2016. Data Anamnesis: Admitting Raw Data into an Organization. IEEE Data Eng. Bull. 39, 2 (2016), 8--20.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_46_1","volume-title":"Beta shapley: a unified and noise-reduced data valuation framework for machine learning. arXiv preprint arXiv:2110.14049","author":"Kwon Yongchan","year":"2021","unstructured":"Yongchan Kwon and James Zou. 2021. Beta shapley: a unified and noise-reduced data valuation framework for machine learning. arXiv preprint arXiv:2110.14049 (2021)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589328"},{"key":"e_1_3_2_1_48_1","volume-title":"Robust classification under sample selection bias. Advances in neural information processing systems 27","author":"Liu Anqi","year":"2014","unstructured":"Anqi Liu and Brian Ziebart. 2014. Robust classification under sample selection bias. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626246.3654680"},{"key":"e_1_3_2_1_50_1","volume-title":"Improving retrieval-augmented large language models via data importance learning. arXiv preprint arXiv:2307.03027","author":"Lyu Xiaozhong","year":"2023","unstructured":"Xiaozhong Lyu, Stefan Grafberger, Samantha Biegel, Shaopeng Wei, Meng Cao, Sebastian Schelter, and Ce Zhang. 2023. Improving retrieval-augmented large language models via data importance learning. arXiv preprint arXiv:2307.03027 (2023)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407801"},{"key":"e_1_3_2_1_52_1","volume-title":"Sudnya Diamos, Greg Diamos, Lynn He, Alicia Parrish, Hannah Rose Kirk, et al.","author":"Mazumder Mark","year":"2024","unstructured":"Mark Mazumder, Colby Banbury, Xiaozhe Yao, Bojan Karla\u0161, William Gaviria Rojas, Sudnya Diamos, Greg Diamos, Lynn He, Alicia Parrish, Hannah Rose Kirk, et al. 2024. Dataperf: Benchmarks for data-centric ai development. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i17.17817"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3457607"},{"key":"e_1_3_2_1_55_1","first-page":"1","article-title":"Mllib: Machine learning in apache spark","volume":"17","author":"Meng Xiangrui","year":"2016","unstructured":"Xiangrui Meng, Joseph Bradley, Burak Yavuz, Evan Sparks, Shivaram Venkataraman, Davies Liu, Jeremy Freeman, DB Tsai, Manish Amde, Sean Owen, et al. 2016. Mllib: Machine learning in apache spark. Journal of Machine Learning Research 17, 34 (2016), 1--7.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_56_1","unstructured":"Metaflow.org. 2024. A framework for real-life ML AI and data science. https:\/\/metaflow.org."},{"key":"e_1_3_2_1_57_1","volume-title":"Certifying Robustness to Programmable Data Bias in Decision Trees. Advances in Neural Information Processing Systems 34","author":"Meyer Anna","year":"2021","unstructured":"Anna Meyer, Aws Albarghouthi, and Loris D'Antoni. 2021. Certifying Robustness to Programmable Data Bias in Decision Trees. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3593988"},{"key":"e_1_3_2_1_59_1","unstructured":"Microsoft. 2018. Azure Machine Learning Pipelines. https:\/\/learn.microsoft.com\/en-us\/azure\/machine-learning\/concept-ml-pipelines."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403205"},{"key":"e_1_3_2_1_61_1","volume-title":"Garnett (Eds.)","volume":"29","author":"Namkoong Hongseok","year":"2016","unstructured":"Hongseok Namkoong and John C Duchi. 2016. Stochastic Gradient Methods for Distributionally Robust Optimization with f-divergences. In Advances in Neural Information Processing Systems, D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, and R. Garnett (Eds.), Vol. 29. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2016\/file\/4588e674d3f0faf985047d4c3f13ed0d-Paper.pdf"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12125"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3526141"},{"key":"e_1_3_2_1_64_1","volume-title":"Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12","author":"Pedregosa Fabian","year":"2011","unstructured":"Fabian Pedregosa, Ga\u00ebl Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, et al. 2011. Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12 (2011), 2825--2830."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3654963"},{"key":"e_1_3_2_1_66_1","first-page":"17044","article-title":"Identifying mislabeled data using the area under the margin ranking","volume":"33","author":"Pleiss Geoff","year":"2020","unstructured":"Geoff Pleiss, Tianyi Zhang, Ethan Elenberg, and Kilian Q Weinberger. 2020. Identifying mislabeled data using the area under the margin ranking. Advances in Neural Information Processing Systems 33 (2020), 17044--17056.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_67_1","volume-title":"Proceedings of machine learning and systems 1","author":"Polyzotis Neoklis","year":"2019","unstructured":"Neoklis Polyzotis, Martin Zinkevich, Sudip Roy, Eric Breck, and Steven Whang. 2019. Data validation for machine learning. Proceedings of machine learning and systems 1 (2019), 334--347."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3522564"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517886"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552490.3552496"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137631"},{"key":"e_1_3_2_1_72_1","volume-title":"Robust Fairness under Covariate Shift. arXiv preprint arXiv:2010.05166","author":"Rezaei Ashkan","year":"2020","unstructured":"Ashkan Rezaei, Anqi Liu, Omid Memarrast, and Brian Ziebart. 2020. Robust Fairness under Covariate Shift. arXiv preprint arXiv:2010.05166 (2020)."},{"key":"e_1_3_2_1_73_1","volume-title":"International Conference on Machine Learning. PMLR, 8230--8241","author":"Rosenfeld Elan","year":"2020","unstructured":"Elan Rosenfeld, Ezra Winston, Pradeep Ravikumar, and Zico Kolter. 2020. Certified robustness to label-flipping attacks via randomized smoothing. In International Conference on Machine Learning. PMLR, 8230--8241."},{"key":"e_1_3_2_1_74_1","unstructured":"Sebastian Schelter and Stefan Grafberger. 2024. Messy Code Makes Managing ML Pipelines Difficult? Just Let LLMs Rewrite the Code! arXiv:2409.10081 [cs.DB] https:\/\/arxiv.org\/abs\/2409.10081"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555041.3589682"},{"key":"e_1_3_2_1_76_1","unstructured":"Maximilian E Sch\u00fcle Luca Scalerandi Alfons Kemper and Thomas Neumann. 2023. Blue Elephants Inspecting Pandas: Inspection and Execution of Machine Learning Pipelines in SQL.. In EDBT. 40--52."},{"key":"e_1_3_2_1_77_1","unstructured":"Scikit-learn. 2024. Pipelines and composite estimators. https:\/\/scikit-learn.org\/stable\/modules\/compose.html."},{"volume-title":"Proceedings of the VLDB Endowment 13","author":"Shastri Supreeth","key":"e_1_3_2_1_78_1","unstructured":"Supreeth Shastri, Vinay Banakar, Melissa Wasserman, Arun Kumar, and Vijay Chidambaram. [n. d.]. Understanding and Benchmarking the Impact of GDPR on Database Systems. Proceedings of the VLDB Endowment 13, 7 ([n. d.])."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617338"},{"key":"e_1_3_2_1_80_1","volume-title":"Pang Wei Koh, and Percy Liang","author":"Steinhardt Jacob","year":"2017","unstructured":"Jacob Steinhardt, Pang Wei Koh, and Percy Liang. 2017. Certified defenses for data poisoning attacks. In NeurIPS."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488717"},{"key":"e_1_3_2_1_82_1","unstructured":"DigiChina Stanford University. [n. d.]. Internet Information Service Algorithmic Recommendation Management Provisions. https:\/\/digichina.stanford.edu\/work\/translation-internet-information-service-algorithmic-recommendation-management-provisions-opinon-seeking-draft\/"},{"key":"e_1_3_2_1_83_1","volume-title":"CAESURA: Language Models as Multi-Modal Query Planners. CIDR","author":"Urban Matthias","year":"2024","unstructured":"Matthias Urban and Carsten Binnig. 2024. CAESURA: Language Models as Multi-Modal Query Planners. CIDR (2024)."},{"key":"e_1_3_2_1_84_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 6388--6421","author":"Wang Jiachen T","year":"2023","unstructured":"Jiachen T Wang and Ruoxi Jia. 2023. Data banzhaf: A robust data valuation framework for machine learning. In International Conference on Artificial Intelligence and Statistics. PMLR, 6388--6421."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415562"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389696"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457566"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.14778\/3485450.3485452"},{"key":"e_1_3_2_1_90_1","volume-title":"2nd USENIX workshop on hot topics in cloud computing (HotCloud 10)","author":"Zaharia Matei","year":"2010","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Michael J Franklin, Scott Shenker, and Ion Stoica. 2010. Spark: Cluster computing with working sets. In 2nd USENIX workshop on hot topics in cloud computing (HotCloud 10)."},{"volume-title":"ICML (3) (JMLR Workshop and Conference Proceedings","author":"Zemel Richard S.","key":"e_1_3_2_1_91_1","unstructured":"Richard S. Zemel, Yu Wu, Kevin Swersky, Toniann Pitassi, and Cynthia Dwork. 2013. Learning Fair Representations. In ICML (3) (JMLR Workshop and Conference Proceedings, Vol. 28). JMLR.org, 325--333."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278721.3278779"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588688"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"crossref","unstructured":"Xuezhou Zhang Xiaojin Zhu and Stephen Wright. 2018. Training set debugging using trusted items. In AAAI.","DOI":"10.1609\/aaai.v32i1.11610"},{"key":"e_1_3_2_1_95_1","volume-title":"Mitigating Label Bias in Machine Learning: Fairness through Confident Learning. arXiv preprint arXiv:2312.08749","author":"Zhang Yixuan","year":"2023","unstructured":"Yixuan Zhang, Boyu Li, Zenan Ling, and Feng Zhou. 2023. Mitigating Label Bias in Machine Learning: Fairness through Confident Learning. arXiv preprint arXiv:2312.08749 (2023)."},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1145\/3654929"},{"key":"e_1_3_2_1_97_1","volume-title":"Learning from Uncertain Data: From Possible Worlds to Possible Models. NeurIPS","author":"Zhu Jiongli","year":"2024","unstructured":"Jiongli Zhu, Su Feng, Boris Glavic, and Babak Salimi. 2024. Learning from Uncertain Data: From Possible Worlds to Possible Models. NeurIPS (2024)."},{"key":"e_1_3_2_1_98_1","volume-title":"Consistent range approximation for fair predictive modeling. VLDB","author":"Zhu Jiongli","year":"2023","unstructured":"Jiongli Zhu, Sainyam Galhotra, Nazanin Sabri, and Babak Salimi. 2023. Consistent range approximation for fair predictive modeling. VLDB (2023)."},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3520170"}],"event":{"name":"SIGMOD\/PODS '25: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Berlin Germany","acronym":"SIGMOD\/PODS '25"},"container-title":["Companion of the 2025 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3722212.3725636","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T22:39:26Z","timestamp":1757543966000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3722212.3725636"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":99,"alternative-id":["10.1145\/3722212.3725636","10.1145\/3722212"],"URL":"https:\/\/doi.org\/10.1145\/3722212.3725636","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]},"assertion":[{"value":"2025-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}