{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:39:48Z","timestamp":1775068788936,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,9]],"date-time":"2021-06-09T00:00:00Z","timestamp":1623196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["IIS-1850195"],"award-info":[{"award-number":["IIS-1850195"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSERC","award":["Discovery Grant & CRD Grant"],"award-info":[{"award-number":["Discovery Grant & CRD Grant"]}]},{"DOI":"10.13039\/501100004489","name":"Mitacs","doi-asserted-by":"publisher","award":["Accelerate Grant"],"award-info":[{"award-number":["Accelerate Grant"]}],"id":[{"id":"10.13039\/501100004489","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,9]]},"DOI":"10.1145\/3448016.3457330","type":"proceedings-article","created":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T17:22:30Z","timestamp":1624036950000},"page":"2271-2280","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":29,"title":["DataPrep.EDA: Task-Centric Exploratory Data Analysis for Statistical Modeling in Python"],"prefix":"10.1145","author":[{"given":"Jinglin","family":"Peng","sequence":"first","affiliation":[{"name":"Simon Fraser University, Burnaby, BC, Canada"}]},{"given":"Weiyuan","family":"Wu","sequence":"additional","affiliation":[{"name":"Simon Fraser University, Burnaby, Canada"}]},{"given":"Brandon","family":"Lockhart","sequence":"additional","affiliation":[{"name":"Simon Fraser University, Burnaby, Canada"}]},{"given":"Song","family":"Bian","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}]},{"given":"Jing Nathan","family":"Yan","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"given":"Linghao","family":"Xu","sequence":"additional","affiliation":[{"name":"Simon Fraser University, Burnaby, BC, Canada"}]},{"given":"Zhixuan","family":"Chi","sequence":"additional","affiliation":[{"name":"Simon Fraser University, Burnaby, BC, Canada"}]},{"given":"Jeffrey M.","family":"Rzeszotarski","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"given":"Jiannan","family":"Wang","sequence":"additional","affiliation":[{"name":"Simon Fraser University, Burnaby, BC, Canada"}]}],"member":"320","published-online":{"date-parts":[[2021,6,18]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. ACLED Asian Conflicts, 2015--2017 . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/jboysen\/asian-conflicts 2020. ACLED Asian Conflicts, 2015--2017. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/jboysen\/asian-conflicts"},{"key":"e_1_3_2_2_2_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Adult Census Income . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/adult-census-income 2020. Adult Census Income. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/adult-census-income"},{"key":"e_1_3_2_2_3_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Alteryx : Automation that lets data speak and people think . Retrieved September 22, 2020 from https:\/\/www.alteryx.com\/ 2020. Alteryx: Automation that lets data speak and people think. Retrieved September 22, 2020 from https:\/\/www.alteryx.com\/"},{"key":"e_1_3_2_2_4_1","unstructured":"2020. Automobile Dataset. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/toramky\/automobile-dataset  2020. Automobile Dataset. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/toramky\/automobile-dataset"},{"key":"e_1_3_2_2_5_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. AutoViz : Automatically Visualize any dataset, any size with a single line of code . Retrieved September 22, 2020 from https:\/\/github.com\/AutoViML\/AutoViz 2020. AutoViz: Automatically Visualize any dataset, any size with a single line of code. Retrieved September 22, 2020 from https:\/\/github.com\/AutoViML\/AutoViz"},{"key":"e_1_3_2_2_6_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Basketball Players Stats per Season - 49 Leagues . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/jacobbaruch\/basketball-players-stats-perseason-49-leagues 2020. Basketball Players Stats per Season - 49 Leagues. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/jacobbaruch\/basketball-players-stats-perseason-49-leagues"},{"key":"e_1_3_2_2_7_1","unstructured":"2020. Bitcoin Dataset. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/mczielinski\/bitcoin-historical-data  2020. Bitcoin Dataset. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/mczielinski\/bitcoin-historical-data"},{"key":"e_1_3_2_2_8_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Chess Game Dataset (Lichess) . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/datasnaek\/chess 2020. Chess Game Dataset (Lichess). Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/datasnaek\/chess"},{"key":"e_1_3_2_2_9_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Data science courses on edX . Retrieved September 22, 2020 from https:\/\/www.edx.org\/course\/subject\/data-science#python 2020. Data science courses on edX. Retrieved September 22, 2020 from https:\/\/www.edx.org\/course\/subject\/data-science#python"},{"key":"e_1_3_2_2_10_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. DataExplorer : Automate Data Exploration and Treatment . Retrieved September 22, 2020 from https:\/\/cran.r-project.org\/web\/packages\/DataExplorer\/vignettes\/dataexplorer-intro.html 2020. DataExplorer: Automate Data Exploration and Treatment. Retrieved September 22, 2020 from https:\/\/cran.r-project.org\/web\/packages\/DataExplorer\/vignettes\/dataexplorer-intro.html"},{"key":"e_1_3_2_2_11_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Default of Credit Card Clients Dataset . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/default-of-credit-card-clients-dataset 2020. Default of Credit Card Clients Dataset. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/default-of-credit-card-clients-dataset"},{"key":"e_1_3_2_2_12_1","unstructured":"2020. Diamonds. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/ shivam2503\/diamonds  2020. Diamonds. Retrieved September 22 2020 from https:\/\/www.kaggle.com\/ shivam2503\/diamonds"},{"key":"e_1_3_2_2_13_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. From Data to Viz . Retrieved September 22, 2020 from https:\/\/www.datato- viz.com\/ 2020. From Data to Viz. Retrieved September 22, 2020 from https:\/\/www.datato- viz.com\/"},{"key":"e_1_3_2_2_14_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Heart Disease UCI . Retrieved September 22, 2020 from https:\/\/www. kaggle.com\/ronitf\/heart-disease-uci 2020. Heart Disease UCI. Retrieved September 22, 2020 from https:\/\/www. kaggle.com\/ronitf\/heart-disease-uci"},{"key":"e_1_3_2_2_15_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Hotel booking demand . Retrieved September 22, 2020 from https:\/\/www. kaggle.com\/jessemostipak\/hotel-booking-demand 2020. Hotel booking demand. Retrieved September 22, 2020 from https:\/\/www. kaggle.com\/jessemostipak\/hotel-booking-demand"},{"key":"e_1_3_2_2_16_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. IBM Data Science Professional Certificate . Retrieved September 22, 2020 from https:\/\/www.coursera.org\/professional-certificates\/ibm-data-science 2020. IBM Data Science Professional Certificate. Retrieved September 22, 2020 from https:\/\/www.coursera.org\/professional-certificates\/ibm-data-science"},{"key":"e_1_3_2_2_17_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. IBM SPSS Statistics: Easy-to-Use Data Analysis . Retrieved September 22, 2020 from https:\/\/www.ibm.com\/analytics\/spss-statistics-software 2020. IBM SPSS Statistics: Easy-to-Use Data Analysis. Retrieved September 22, 2020 from https:\/\/www.ibm.com\/analytics\/spss-statistics-software"},{"key":"e_1_3_2_2_18_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. JMP : Statistical Discovery From SAS . Retrieved September 22, 2020 from https:\/\/www.jmp.com 2020. JMP: Statistical Discovery From SAS. Retrieved September 22, 2020 from https:\/\/www.jmp.com"},{"key":"e_1_3_2_2_19_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Kaggle : Your Machine Learning and Data Science Community . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/ 2020. Kaggle: Your Machine Learning and Data Science Community. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/"},{"key":"e_1_3_2_2_20_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Lux : A Python API for Intelligent Visual Discovery . Retrieved September 22, 2020 from https:\/\/github.com\/lux-org\/lux 2020. Lux: A Python API for Intelligent Visual Discovery. Retrieved September 22, 2020 from https:\/\/github.com\/lux-org\/lux"},{"key":"e_1_3_2_2_21_1","unstructured":"2020. Microsoft Excel:Work together on Excel spreadsheets. Retrieved September 22 2020 from https:\/\/www.microsoft.com\/en-us\/microsoft-365\/excel  2020. Microsoft Excel:Work together on Excel spreadsheets. Retrieved September 22 2020 from https:\/\/www.microsoft.com\/en-us\/microsoft-365\/excel"},{"key":"e_1_3_2_2_22_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Microsoft Power BI: Data Visualization . Retrieved September 22, 2020 from https:\/\/powerbi.microsoft.com\/en-us\/ 2020. Microsoft Power BI: Data Visualization. Retrieved September 22, 2020 from https:\/\/powerbi.microsoft.com\/en-us\/"},{"key":"e_1_3_2_2_23_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Pima Indians Diabetes Database . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/pima-indians-diabetes-database 2020. Pima Indians Diabetes Database. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/uciml\/pima-indians-diabetes-database"},{"key":"e_1_3_2_2_24_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Python for Data Science and Machine Learning Bootcamp . Retrieved September 22, 2020 from https:\/\/www.udemy.com\/course\/python-for-data-scienceand- machine-learning-bootcamp\/ 2020. Python for Data Science and Machine Learning Bootcamp. Retrieved September 22, 2020 from https:\/\/www.udemy.com\/course\/python-for-data-scienceand- machine-learning-bootcamp\/"},{"key":"e_1_3_2_2_25_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Qlik : Data Analytics and Data Integration Solutions . Retrieved September 22, 2020 from https:\/\/www.qlik.com\/us\/ 2020. Qlik: Data Analytics and Data Integration Solutions. Retrieved September 22, 2020 from https:\/\/www.qlik.com\/us\/"},{"key":"e_1_3_2_2_26_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Rain in Australia . Retrieved September 22, 2020 from https:\/\/www.kaggle. com\/jsphyg\/weather-dataset-rattle-package 2020. Rain in Australia. Retrieved September 22, 2020 from https:\/\/www.kaggle. com\/jsphyg\/weather-dataset-rattle-package"},{"key":"e_1_3_2_2_27_1","volume-title":"Artificial Intelligence and Data Management. Retrieved","year":"2020","unstructured":"2020. SAS : Analytics , Artificial Intelligence and Data Management. Retrieved September 22, 2020 from https:\/\/www.sas.com\/ 2020. SAS: Analytics, Artificial Intelligence and Data Management. Retrieved September 22, 2020 from https:\/\/www.sas.com\/"},{"key":"e_1_3_2_2_28_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Solar Radiation Prediction . Retrieved September 22, 2020 from https: \/\/www.kaggle.com\/dronio\/SolarEnergy 2020. Solar Radiation Prediction. Retrieved September 22, 2020 from https: \/\/www.kaggle.com\/dronio\/SolarEnergy"},{"key":"e_1_3_2_2_29_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. splunk : The Data-to-Everything Platform . Retrieved September 22, 2020 from https:\/\/www.splunk.com\/ 2020. splunk: The Data-to-Everything Platform. Retrieved September 22, 2020 from https:\/\/www.splunk.com\/"},{"key":"e_1_3_2_2_30_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Suicide Rates Overview 1985 to 2016 . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/russellyates88\/suicide-rates-overview-1985-to-2016 2020. Suicide Rates Overview 1985 to 2016. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/russellyates88\/suicide-rates-overview-1985-to-2016"},{"key":"e_1_3_2_2_31_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Sweetviz : an open source Python library that generates beautiful, highdensity visualizations to kickstart EDA (Exploratory Data Analysis) with a single line of code . Retrieved September 22, 2020 from https:\/\/github.com\/fbdesignpro\/ sweetviz 2020. Sweetviz: an open source Python library that generates beautiful, highdensity visualizations to kickstart EDA (Exploratory Data Analysis) with a single line of code. Retrieved September 22, 2020 from https:\/\/github.com\/fbdesignpro\/ sweetviz"},{"key":"e_1_3_2_2_32_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Tableau : an interactive data visualization software company . Retrieved September 22, 2020 from https:\/\/www.tableau.com\/ 2020. Tableau: an interactive data visualization software company. Retrieved September 22, 2020 from https:\/\/www.tableau.com\/"},{"key":"e_1_3_2_2_33_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. The TIOBE Programming Community Index . Retrieved September 22, 2020 from https:\/\/www.tiobe.com\/tiobe-index 2020. The TIOBE Programming Community Index. Retrieved September 22, 2020 from https:\/\/www.tiobe.com\/tiobe-index"},{"key":"e_1_3_2_2_34_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. The UC Berkeley Foundations of Data Science Course . Retrieved September 22, 2020 from http:\/\/data8.org\/ 2020. The UC Berkeley Foundations of Data Science Course. Retrieved September 22, 2020 from http:\/\/data8.org\/"},{"key":"e_1_3_2_2_35_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. TIBCO Spotfire Data Visualization and Analytics Software . Retrieved September 22, 2020 from https:\/\/www.tibco.com\/products\/tibco-spotfire 2020. TIBCO Spotfire Data Visualization and Analytics Software. Retrieved September 22, 2020 from https:\/\/www.tibco.com\/products\/tibco-spotfire"},{"key":"e_1_3_2_2_36_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Titanic : Machine Learning from Disaster . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/c\/titanic\/data'select=train.csv 2020. Titanic: Machine Learning from Disaster. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/c\/titanic\/data'select=train.csv"},{"key":"e_1_3_2_2_37_1","volume-title":"Retrieved","year":"2020","unstructured":"2020. Top Women Chess Players . Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/vikasojha98\/top-women-chess-players 2020. Top Women Chess Players. Retrieved September 22, 2020 from https:\/\/www.kaggle.com\/vikasojha98\/top-women-chess-players"},{"key":"e_1_3_2_2_38_1","volume-title":"Koalas: pandas API on Apache Spark. Retrieved","year":"2021","unstructured":"2021. Koalas: pandas API on Apache Spark. Retrieved February 9, 2021 from https:\/\/github.com\/databricks\/koalas 2021. Koalas: pandas API on Apache Spark. Retrieved February 9, 2021 from https:\/\/github.com\/databricks\/koalas"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-015-0389-y"},{"key":"e_1_3_2_2_40_1","volume-title":"Computer Graphics Forum","author":"Battle Leilani","unstructured":"Leilani Battle and Jeffrey Heer . 2019. Characterizing exploratory visual analysis: A literature review and evaluation of analytic provenance in tableau . In Computer Graphics Forum , Vol. 38 . Wiley Online Library , 145--159. Leilani Battle and Jeffrey Heer. 2019. Characterizing exploratory visual analysis: A literature review and evaluation of analytic provenance in tableau. In Computer Graphics Forum, Vol. 38. Wiley Online Library, 145--159."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.00547"},{"key":"e_1_3_2_2_42_1","volume-title":"Bokeh: Python library for interactive visualization. https:\/\/bokeh.pydata.org\/en\/latest\/","author":"Team Bokeh Development","year":"2018","unstructured":"Bokeh Development Team . 2018 . Bokeh: Python library for interactive visualization. https:\/\/bokeh.pydata.org\/en\/latest\/ Bokeh Development Team. 2018. Bokeh: Python library for interactive visualization. https:\/\/bokeh.pydata.org\/en\/latest\/"},{"key":"e_1_3_2_2_43_1","volume-title":"Pandas-profiling: Exploratory Data Analysis for Python. https:\/\/github.com\/pandas-profiling\/pandas-profiling.","author":"Brugman Simon","year":"2019","unstructured":"Simon Brugman . 2019 . Pandas-profiling: Exploratory Data Analysis for Python. https:\/\/github.com\/pandas-profiling\/pandas-profiling. Simon Brugman. 2019. Pandas-profiling: Exploratory Data Analysis for Python. https:\/\/github.com\/pandas-profiling\/pandas-profiling."},{"key":"e_1_3_2_2_44_1","unstructured":"Lars Buitinck Gilles Louppe Mathieu Blondel Fabian Pedregosa Andreas Mueller Olivier Grisel Vlad Niculae Peter Prettenhofer Alexandre Gramfort Jaques Grobler etal 2013. API design for machine learning software: experiences from the scikit-learn project. arXiv preprint arXiv:1309.0238 (2013).  Lars Buitinck Gilles Louppe Mathieu Blondel Fabian Pedregosa Andreas Mueller Olivier Grisel Vlad Niculae Peter Prettenhofer Alexandre Gramfort Jaques Grobler et al. 2013. API design for machine learning software: experiences from the scikit-learn project. arXiv preprint arXiv:1309.0238 (2013)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFVIS.1997.636792"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1177\/1473871618806555"},{"key":"e_1_3_2_2_47_1","volume-title":"Foresight: Recommending visual insights. arXiv preprint arXiv:1707.03877","author":"Demiralp \u00c7aatay","year":"2017","unstructured":"\u00c7aatay Demiralp , Peter J Haas , Srinivasan Parthasarathy , and Tejaswini Pedapati . 2017 . Foresight: Recommending visual insights. arXiv preprint arXiv:1707.03877 (2017). \u00c7aatay Demiralp, Peter J Haas, Srinivasan Parthasarathy, and Tejaswini Pedapati. 2017. Foresight: Recommending visual insights. arXiv preprint arXiv:1707.03877 (2017)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415508"},{"key":"e_1_3_2_2_49_1","volume-title":"Data2vis: Automatic generation of data visualizations using sequence-to-sequence recurrent neural networks","author":"Dibia Victor","year":"2019","unstructured":"Victor Dibia and \u00c7aatay Demiralp . 2019. Data2vis: Automatic generation of data visualizations using sequence-to-sequence recurrent neural networks . IEEE computer graphics and applications 39, 5 ( 2019 ), 33--46. Victor Dibia and \u00c7aatay Demiralp. 2019. Data2vis: Automatic generation of data visualizations using sequence-to-sequence recurrent neural networks. IEEE computer graphics and applications 39, 5 (2019), 33--46."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3314037"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300358"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2007.55"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2254556.2254659"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219867"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2018.00019"},{"key":"e_1_3_2_2_56_1","volume-title":"Show me: Automatic presentation for visual analysis","author":"Mackinlay Jock","year":"2007","unstructured":"Jock Mackinlay , Pat Hanrahan , and Chris Stolte . 2007. Show me: Automatic presentation for visual analysis . IEEE transactions on visualization and computer graphics 13, 6 ( 2007 ), 1137--1144. Jock Mackinlay, Pat Hanrahan, and Chris Stolte. 2007. Show me: Automatic presentation for visual analysis. IEEE transactions on visualization and computer graphics 13, 6 (2007), 1137--1144."},{"key":"e_1_3_2_2_57_1","volume-title":"Formalizing Visualization Design Knowledge as Constraints: Actionable and Extensible Models in Draco","author":"Moritz Dominik","year":"2019","unstructured":"Dominik Moritz , ChenglongWang, Gregory Nelson , Halden Lin , Adam M. Smith , Bill Howe , and Jeffrey Heer . 2019. Formalizing Visualization Design Knowledge as Constraints: Actionable and Extensible Models in Draco . IEEE Trans. Visualization & Comp. Graphics (Proc. InfoVis) ( 2019 ). http:\/\/idl.cs.washington.edu\/papers\/ draco Dominik Moritz, ChenglongWang, Gregory Nelson, Halden Lin, Adam M. Smith, Bill Howe, and Jeffrey Heer. 2019. Formalizing Visualization Design Knowledge as Constraints: Actionable and Extensible Models in Draco. IEEE Trans. Visualization & Comp. Graphics (Proc. InfoVis) (2019). http:\/\/idl.cs.washington.edu\/papers\/ draco"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824086"},{"key":"e_1_3_2_2_59_1","unstructured":"Roger Peng. 2012. Exploratory data analysis with R. Lulu. com.  Roger Peng. 2012. Exploratory data analysis with R. Lulu. com."},{"key":"e_1_3_2_2_60_1","volume-title":"Stephen Macke, Doris Xin, Xiangxi Mo, Joseph E. Gonzalez, Joseph M. Hellerstein, Anthony D. Joseph, and Aditya G. Parameswaran.","author":"Petersohn Devin","year":"2020","unstructured":"Devin Petersohn , William W. Ma , Doris Jung Lin Lee , Stephen Macke, Doris Xin, Xiangxi Mo, Joseph E. Gonzalez, Joseph M. Hellerstein, Anthony D. Joseph, and Aditya G. Parameswaran. 2020 . Towards Scalable Dataframe Systems. CoRR abs\/2001.00888 (2020). arXiv:2001.00888 http:\/\/arxiv.org\/abs\/2001.00888 Devin Petersohn, William W. Ma, Doris Jung Lin Lee, Stephen Macke, Doris Xin, Xiangxi Mo, Joseph E. Gonzalez, Joseph M. Hellerstein, Anthony D. Joseph, and Aditya G. Parameswaran. 2020. Towards Scalable Dataframe Systems. CoRR abs\/2001.00888 (2020). arXiv:2001.00888 http:\/\/arxiv.org\/abs\/2001.00888"},{"key":"e_1_3_2_2_61_1","first-page":"381","article-title":"Potter's wheel: An interactive data cleaning system","volume":"1","author":"Raman Vijayshankar","year":"2001","unstructured":"Vijayshankar Raman and Joseph M Hellerstein . 2001 . Potter's wheel: An interactive data cleaning system . In VLDB , Vol. 1. 381 -- 390 . Vijayshankar Raman and Joseph M Hellerstein. 2001. Potter's wheel: An interactive data cleaning system. In VLDB, Vol. 1. 381--390.","journal-title":"VLDB"},{"key":"e_1_3_2_2_62_1","unstructured":"Howard J Seltman. 2012. Experimental design and analysis.  Howard J Seltman. 2012. Experimental design and analysis."},{"key":"e_1_3_2_2_63_1","volume-title":"Proceedings of the 2016 ACM SIGMOD International Conference on Management of Data.","author":"Siddiqui Tarique","year":"2016","unstructured":"Tarique Siddiqui , Albert Kim , John Lee , Karrie Karahalios , and Aditya Parameswaran . 2016 . zenvisage: Effortless visual data exploration . In Proceedings of the 2016 ACM SIGMOD International Conference on Management of Data. Tarique Siddiqui, Albert Kim, John Lee, Karrie Karahalios, and Aditya Parameswaran. 2016. zenvisage: Effortless visual data exploration. In Proceedings of the 2016 ACM SIGMOD International Conference on Management of Data."},{"key":"e_1_3_2_2_64_1","volume-title":"The Landscape of R Packages for Automated Exploratory Data Analysis. arXiv preprint arXiv:1904.02101","author":"Staniak Mateusz","year":"2019","unstructured":"Mateusz Staniak and Przemyslaw Biecek . 2019. The Landscape of R Packages for Automated Exploratory Data Analysis. arXiv preprint arXiv:1904.02101 ( 2019 ). Mateusz Staniak and Przemyslaw Biecek. 2019. The Landscape of R Packages for Automated Exploratory Data Analysis. arXiv preprint arXiv:1904.02101 (2019)."},{"key":"#cr-split#-e_1_3_2_2_65_1.1","doi-asserted-by":"crossref","unstructured":"Bo Tang Shi Han Man Yiu Rui Ding and Dongmei Zhang. 2017. Extracting Top-K Insights from Multi-dimensional Data. 1509--1524. https:\/\/doi.org\/10.1145\/3035918.3035922 10.1145\/3035918.3035922","DOI":"10.1145\/3035918.3035922"},{"key":"#cr-split#-e_1_3_2_2_65_1.2","doi-asserted-by":"crossref","unstructured":"Bo Tang Shi Han Man Yiu Rui Ding and Dongmei Zhang. 2017. Extracting Top-K Insights from Multi-dimensional Data. 1509--1524. https:\/\/doi.org\/10.1145\/3035918.3035922","DOI":"10.1145\/3035918.3035922"}],"event":{"name":"SIGMOD\/PODS '21: International Conference on Management of Data","location":"Virtual Event China","acronym":"SIGMOD\/PODS '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2021 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3457330","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448016.3457330","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448016.3457330","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:25:04Z","timestamp":1750195504000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3457330"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,9]]},"references-count":66,"alternative-id":["10.1145\/3448016.3457330","10.1145\/3448016"],"URL":"https:\/\/doi.org\/10.1145\/3448016.3457330","relation":{},"subject":[],"published":{"date-parts":[[2021,6,9]]},"assertion":[{"value":"2021-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}