{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:51:55Z","timestamp":1759938715229,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,12,10]]},"DOI":"10.1109\/bigdata50022.2020.9378296","type":"proceedings-article","created":{"date-parts":[[2021,3,19]],"date-time":"2021-03-19T21:10:21Z","timestamp":1616188221000},"page":"1644-1653","source":"Crossref","is-referenced-by-count":9,"title":["DQLearn : A Toolkit for Structured Data Quality Learning"],"prefix":"10.1109","author":[{"given":"Shrey","family":"Shrivastava","sequence":"first","affiliation":[]},{"given":"Dhaval","family":"Patel","sequence":"additional","affiliation":[]},{"given":"Nianjun","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Arun","family":"Iyengar","sequence":"additional","affiliation":[]},{"given":"Anuradha","family":"Bhamidipaty","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"sparse dataframe","year":"2020","key":"ref30"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229867"},{"journal-title":"Great Expectations github","year":"2020","key":"ref11"},{"journal-title":"Spark DF Profiling github","year":"2020","key":"ref12"},{"journal-title":"Pandas Profiling github","year":"2020","key":"ref13"},{"journal-title":"Drunken Data Quality github","year":"2020","key":"ref14"},{"journal-title":"Test Driven Data Analysis","year":"2020","key":"ref15"},{"journal-title":"Friction less Data","year":"2020","key":"ref16"},{"journal-title":"Data Clean panda","year":"2020","key":"ref17"},{"journal-title":"Datadiff","year":"2020","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9006187"},{"year":"0","key":"ref28","article-title":"Mlops: Continuous delivery and automation pipelines in machine learning"},{"journal-title":"Big data and the supply chain The big-supply-chain analytics landscape (Part 1)","year":"2020","key":"ref4"},{"article-title":"Crisp-dm: towards a standard process modell for data mining","year":"2000","author":"wirth","key":"ref27"},{"article-title":"Enabling a digital and analytics transformation in heavy-industry manufacturing","year":"0","author":"milan korbel","key":"ref3"},{"key":"ref6","article-title":"Data validation for machine learning","author":"breck","year":"2019","journal-title":"Proc of SysML"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/2641190.2641198","article-title":"Openml: Networked science in machine learning","volume":"15","author":"vanschoren","year":"2013","journal-title":"SIGKDD Explorations"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.egypro.2017.07.354"},{"article-title":"Machine learning systems for intelligent services in the iot: A survey","year":"2020","author":"toussaint","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3299887.3299891"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2765544"},{"article-title":"Towards crisp-ml(q): A machine learning process model with quality assurance methodology","year":"2020","author":"studer","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3991\/ijim.v11i5.7072"},{"journal-title":"Python automatic data quality check","year":"2020","key":"ref20"},{"article-title":"Cleanml: A benchmark for joint data cleaning and machine learning [experiments and analysis]","year":"2019","author":"li","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313602"},{"key":"ref24","article-title":"Scikit-learn: Machine learning in python","volume":"abs 1201 490","author":"pedregosa","year":"2012","journal-title":"CoRR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.14778\/2994509.2994514"},{"key":"ref26","article-title":"Mlbase: A distributed machine-learning system","author":"kraska","year":"2013","journal-title":"CIDR"},{"key":"ref25","article-title":"API design for machine learning software: experiences from the scikit-learn project","volume":"abs 1309 238","author":"buitinck","year":"2013","journal-title":"CoRR"}],"event":{"name":"2020 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2020,12,10]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2020,12,13]]}},"container-title":["2020 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9377717\/9377728\/09378296.pdf?arnumber=9378296","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:53:14Z","timestamp":1656345194000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9378296\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,10]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/bigdata50022.2020.9378296","relation":{},"subject":[],"published":{"date-parts":[[2020,12,10]]}}}