{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T14:17:25Z","timestamp":1773065845889,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"U.S. Department of Energy, Office of Science","award":["DE-AC02-06CH11357 (ANL), GR134836 (OSU), and DE-AC02-05CH11231 (LBNL)"],"award-info":[{"award-number":["DE-AC02-06CH11357 (ANL), GR134836 (OSU), and DE-AC02-05CH11231 (LBNL)"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3676288.3676296","type":"proceedings-article","created":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T04:20:18Z","timestamp":1724473218000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["AI Data Readiness Inspector (AIDRIN) for Quantitative Assessment of Data Readiness for AI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8516-0215","authenticated-orcid":false,"given":"Kaveen","family":"Hiniduma","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3048-3448","authenticated-orcid":false,"given":"Suren","family":"Byna","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, The Ohio State University, USA and Scientific Data Division, Lawrence Berkeley National Laboratory, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3915-1135","authenticated-orcid":false,"given":"Jean Luca","family":"Bez","sequence":"additional","affiliation":[{"name":"Scientific Data Division, Lawrence Berkeley National Laboratory, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2130-2887","authenticated-orcid":false,"given":"Ravi","family":"Madduri","sequence":"additional","affiliation":[{"name":"Data Science and Learning, Argonne National Laboratory, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,23]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. AIDRIn: AI Data Readiness Inspector. test.pypi.org\/project\/aidrin\/0.5.4"},{"key":"e_1_3_2_1_2_1","volume-title":"d.]. DataCite Metadata Schema. DataCite Schema. https:\/\/schema.datacite.org\/ Accessed","year":"2024","unstructured":"[n. d.]. DataCite Metadata Schema. DataCite Schema. https:\/\/schema.datacite.org\/ Accessed 18 Feb. 2024."},{"key":"e_1_3_2_1_3_1","volume-title":"d.]. DCAT-US Schema v1.1 (Project Open Data Metadata Schema). Project Open Data Metadata Schema. resources.data.gov\/resources\/dcat-us\/","year":"2024","unstructured":"[n. d.]. DCAT-US Schema v1.1 (Project Open Data Metadata Schema). Project Open Data Metadata Schema. resources.data.gov\/resources\/dcat-us\/ Feb. 2024."},{"key":"e_1_3_2_1_4_1","unstructured":"K. Aas M. Jullum and A. L\u00f8land. 2019. Explaining Individual Predictions When Features Are Dependent: More Accurate Approximations to Shapley Values. arXiv preprint arXiv:1903.10464 [cs stat] (March 2019). http:\/\/arxiv.org\/abs\/1903.10464"},{"key":"e_1_3_2_1_5_1","volume-title":"Data Readiness Report. In IEEE Int. Conference on Smart Data Services (SMDS). 42\u201351","author":"Afzal S.","unstructured":"S. Afzal, C. Rajmohan, M. Kesarwani, S. Mehta, and H. Patel. 2020. Data Readiness Report. In IEEE Int. Conference on Smart Data Services (SMDS). 42\u201351."},{"key":"e_1_3_2_1_6_1","unstructured":"F. Alberto S. Garc\u00eda M. Galar R. Prati B. Krawczyk and F. Herrera. 2018. Learning from Imbalanced Data Sets. Springer."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552433"},{"key":"e_1_3_2_1_8_1","volume-title":"AI Fairness 360: An Extensible Toolkit for Detecting, Understanding, and Mitigating Unwanted Algorithmic Bias. arXiv preprint arXiv:1810.01943","author":"Rachel","year":"2018","unstructured":"Rachel K.\u00a0E. Bellamy 2018. AI Fairness 360: An Extensible Toolkit for Detecting, Understanding, and Mitigating Unwanted Algorithmic Bias. arXiv preprint arXiv:1810.01943 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1891879.1891881"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190578"},{"key":"e_1_3_2_1_11_1","volume-title":"LOF: Identifying Density-Based Local Outliers. In ACM SIGMOD Int. Conf. Manage. Data.","author":"Breunig M.","unstructured":"M.\u00a0M. Breunig, H.-P. Kriegel, R.\u00a0T. Ng, and J. Sander. 2000. LOF: Identifying Density-Based Local Outliers. In ACM SIGMOD Int. Conf. Manage. Data."},{"key":"e_1_3_2_1_12_1","volume-title":"The Privacy Onion Effect: Memorization is Relative. arXiv:2206.10469 [cs.LG]","author":"Carlini N.","year":"2022","unstructured":"N. Carlini, M. Jagielski, C. Zhang, N. Papernot, 2022. The Privacy Onion Effect: Memorization is Relative. arXiv:2206.10469 [cs.LG] (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"Data Preprocessing to Mitigate Bias: A Maximum Entropy Based Approach. arXiv:1906.02164 [cs.LG]","author":"Celis E.","year":"2020","unstructured":"L.\u00a0E. Celis, V. Keswani, and N.\u00a0K. Vishnoi. 2020. Data Preprocessing to Mitigate Bias: A Maximum Entropy Based Approach. arXiv:1906.02164 [cs.LG] (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Mathematical Methods of Statistics","author":"Cram\u00e9r Harald","unstructured":"Harald Cram\u00e9r. 1946. Mathematical Methods of Statistics. Princeton University Press, Princeton. 282 pages."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","unstructured":"N. Davari B. Veloso R. Ribeiro and J. Gama. 2023. MetroPT-3 Dataset. UCI Machine Learning Repository. https:\/\/doi.org\/10.24432\/C5VW3R","DOI":"10.24432\/C5VW3R"},{"key":"e_1_3_2_1_16_1","unstructured":"V. Duddu S. Szyller and N. Asokan. 2022. SHAPr: An Efficient and Versatile Membership Privacy Risk Metric for Machine Learning. arXiv:2112.02230 (2022)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.02283"},{"key":"e_1_3_2_1_18_1","unstructured":"S.\u00a0Cholia et al.2024. ESS-DIVE Overview: A Scalable User-Focused Repository for Earth and Environmental Science Data. https:\/\/ess-dive.lbl.gov\/"},{"key":"e_1_3_2_1_19_1","volume-title":"d.]. FAIRassist.Org. https:\/\/fairassist.org","year":"2024","unstructured":"FAIRassist.org. [n. d.]. FAIRassist.Org. https:\/\/fairassist.org. Jan. 6, 2024."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"M. Feldman S.\u00a0A. Friedler J. Moeller C. Scheidegger and S. Venkatasubramanian. 2015. Certifying and Removing Disparate Impact. In 21st ACM SIGKDD.","DOI":"10.1145\/2783258.2783311"},{"key":"e_1_3_2_1_21_1","volume-title":"Statistics (International Student Edition)","author":"Freedman David","unstructured":"David Freedman, Robert Pisani, and Roger Purves. 2007. Statistics (International Student Edition) (4th ed.). WW Norton & Company, New York.","edition":"4"},{"key":"e_1_3_2_1_22_1","unstructured":"C. Frye I. Feige and C. Rowat. 2019. Asymmetric Shapley Values: Incorporating Causal Knowledge into Model-Agnostic Explainability."},{"key":"e_1_3_2_1_23_1","unstructured":"GO FAIR. 2022. GO FAIR Principles. https:\/\/www.go-fair.org\/fair-principles\/."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1056\/NEJMp1607591"},{"key":"e_1_3_2_1_25_1","volume-title":"Data Quality Toolkit: Automatic Assessment of Data Quality and Remediation for Machine Learning Datasets. arXiv preprint arXiv:2108.05935","author":"Gupta Nitin","year":"2021","unstructured":"Nitin Gupta, Hima Patel, 2021. Data Quality Toolkit: Automatic Assessment of Data Quality and Remediation for Machine Learning Datasets. arXiv preprint arXiv:2108.05935 (2021)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-019-0274-4"},{"key":"e_1_3_2_1_27_1","unstructured":"Kaveen Hiniduma Suren Byna and Jean\u00a0Luca Bez. 2024. Data Readiness for AI: A 360-Degree Survey. arxiv:2404.05779"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.24432\/C5NC77"},{"key":"e_1_3_2_1_29_1","unstructured":"Sarah Holland Ahmed Hosny Sarah Newman Joshua Joseph and Kasia Chmielinski. 2018. The Dataset Nutrition Label: A Framework To Drive Higher Data Quality Standards. (2018). arxiv:arXiv:1805.03677\u00a0[cs.DB]"},{"key":"e_1_3_2_1_30_1","volume-title":"Accessed","year":"2024","unstructured":"Informatica. [n. d.]. Data Quality Metrics & Measures - All You Need to Know. Accessed Jan. 10, 2024."},{"key":"e_1_3_2_1_31_1","unstructured":"M. Kelly R. Longjohn and K. Nottingham. [n. d.]. The UCI Machine Learning Repository. https:\/\/archive.ics.uci.edu."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/e-Science58273.2023.10254842"},{"key":"e_1_3_2_1_33_1","first-page":"I","article-title":"A Unified Approach to Interpreting Model Predictions","volume":"30","author":"Lundberg M","year":"2017","unstructured":"Scott\u00a0M Lundberg and Su-In Lee. 2017. A Unified Approach to Interpreting Model Predictions. In Advances in Neural Information Processing Systems 30, I.\u00a0Guyon, U.\u00a0V. Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Curran Associates, Inc., 4765\u20134774.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_34_1","unstructured":"D.\u00a0Mar\u00edn L\u00f3pez D. Mar\u00edn J. Fonollosa J. Llano A. Perera and Z. Haddi. 2023. Single Elder Home Monitoring: Gas and Position. UCI ML Repository."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","unstructured":"R. Marcinkevi\u010ds 2023. Regensburg Pediatric Appendicitis Dataset (1.01) [Data set]. Zenodo. https:\/\/doi.org\/10.5281\/zenodo.7669442","DOI":"10.5281\/zenodo.7669442"},{"key":"e_1_3_2_1_36_1","unstructured":"Ninareh Mehrabi 2022. A Survey on Bias and Fairness in Machine Learning. arxiv:1908.09635\u00a0[cs.LG]"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"L. Merrick and A. Taly. 2019. The Explanation Game: Explaining Machine Learning Models with Cooperative Game Theory. arXiv:1909.08128 (2019).","DOI":"10.1007\/978-3-030-57321-8_2"},{"key":"e_1_3_2_1_38_1","unstructured":"MIDRC. [n. d.]. The Medical Imaging and Data Resource Center (MIDRC). https:\/\/www.midrc.org\/"},{"key":"e_1_3_2_1_39_1","unstructured":"National Cancer Institute Center for Biomedical Informatics and Information Technology. n.d.. CRDC insights. https:\/\/datacommons.cancer.gov\/news\/nci-crdc-artificial-intelligence-data-readiness-aidr-challenge."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2017.08.002"},{"key":"e_1_3_2_1_41_1","volume-title":"Incremental Local Outlier Detection for Data Streams. In IEEE Symp. Comput. Intell. Data Mining. 504\u2013515","author":"Pokrajac D.","year":"2007","unstructured":"D. Pokrajac, A. Lazarevic, and L.\u00a0J. Latecki. 2007. Incremental Local Outlier Detection for Data Streams. In IEEE Symp. Comput. Intell. Data Mining. 504\u2013515."},{"key":"e_1_3_2_1_42_1","volume-title":"Improving Fairness in AI Models on Electronic Health Records: The Case for Federated Learning Methods. arXiv preprint arXiv:2305.11386","author":"Raphael Poulain","year":"2023","unstructured":"Raphael Poulain 2023. Improving Fairness in AI Models on Electronic Health Records: The Case for Federated Learning Methods. arXiv preprint arXiv:2305.11386 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"The FAIR Cookbook - The Essential Resource for and by FAIR Doers. Sci Data 10","author":"Rocca-Serra P.","year":"2023","unstructured":"P. Rocca-Serra, W. Gu, V. Ioannidis, 2023. The FAIR Cookbook - The Essential Resource for and by FAIR Doers. Sci Data 10 (2023)."},{"key":"e_1_3_2_1_44_1","unstructured":"M. Ryu [n. d.]. APPFL: Advanced Privacy-Preserving Federated Learning."},{"key":"e_1_3_2_1_45_1","volume-title":"APPFL: Open-Source Software Framework for Privacy-Preserving Federated Learning. In IPDPS Workshops. IEEE, 1074\u20131083","author":"Ryu Minseok","year":"2022","unstructured":"Minseok Ryu 2022. APPFL: Open-Source Software Framework for Privacy-Preserving Federated Learning. In IPDPS Workshops. IEEE, 1074\u20131083."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229867"},{"key":"e_1_3_2_1_47_1","volume-title":"DQLearn: A Toolkit for Structured Data Quality Learning. In International Conference on Big Data (Big Data). 1644\u20131653","author":"Shrivastava S.","year":"2020","unstructured":"S. Shrivastava 2020. DQLearn: A Toolkit for Structured Data Quality Learning. In International Conference on Big Data (Big Data). 1644\u20131653."},{"key":"e_1_3_2_1_48_1","volume-title":"Systematic Evaluation of Privacy Risks of Machine Learning Models. In 30th USENIX Security Symposium (USENIX Security 21)","author":"Song L.","unstructured":"L. Song and P. Mittal. 2021. Systematic Evaluation of Privacy Risks of Machine Learning Models. In 30th USENIX Security Symposium (USENIX Security 21)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"H. Theil. 1992. Some Reflections on Static Programming under Uncertainty. In Henri Theil\u2019s Contributions to Economics and Econometrics B.\u00a0Raj and J.\u00a0Koerts (Eds.). Advanced Studies in Theoretical and Applied Econometrics Vol.\u00a024.","DOI":"10.1007\/978-94-011-2410-2_13"},{"key":"e_1_3_2_1_50_1","volume-title":"Privacy Risk Quantification in Education Data Using Markov Model. British Journal of Educational Technology","author":"Dinusha Vatsalan","year":"2022","unstructured":"Dinusha Vatsalan 2022. Privacy Risk Quantification in Education Data Using Markov Model. British Journal of Educational Technology (2022), 804\u2013821."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2018.09.012"}],"event":{"name":"SSDBM 2024: 36th International Conference on Scientific and Statistical Database Management","location":"Rennes France","acronym":"SSDBM 2024"},"container-title":["Proceedings of the 36th International Conference on Scientific and Statistical Database Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676288.3676296","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676288.3676296","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:35Z","timestamp":1750291535000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676288.3676296"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":51,"alternative-id":["10.1145\/3676288.3676296","10.1145\/3676288"],"URL":"https:\/\/doi.org\/10.1145\/3676288.3676296","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-08-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}